import torch
torch.backends.cuda.matmul.allow_tf32 = True
import torch.nn as nn
import transformers
from utils import get_local_dir, get_local_run_dir, disable_dropout, init_distributed
import os
import hydra
import torch.distributed as dist
import torch.multiprocessing as mp
from omegaconf import OmegaConf, DictConfig
import mask_trainers as trainers
import wandb
import json
import socket
from typing import Optional, Set
from huggingface_hub import login
from peft import LoraConfig, PeftModel, get_peft_model
from collections import defaultdict
import random
from typing import List, Optional

dist.set_debug_level(dist.DebugLevel.OFF)
OmegaConf.register_new_resolver("get_local_run_dir", lambda exp_name, local_dirs: get_local_run_dir(exp_name, local_dirs))
# 全局记录：key 是 layer idx，value 是一个 list，保存所有向该层 block_sparse_moe 选中的专家 id
moe_records = defaultdict(list)

class MoEHook:
    def __init__(self, layer_idx):
        self.layer_idx = layer_idx

    def __call__(self, module, inputs, output):
        # module.selected_experts: Tensor [batch, seq_len, k]
        with torch.no_grad():
            sel = module.selected_experts
            flat = sel.reshape(-1).cpu().tolist()
            moe_records[self.layer_idx].extend(flat)
            #print("selected experts are", sel)
            #print("all moe records are", moe_records)

def worker_main(rank: int, world_size: int, config: DictConfig, policy: nn.Module, reference_model: Optional[nn.Module] = None):
    """Main function for each worker process (may be only 1 for BasicTrainer/TensorParallelTrainer)."""
    if 'FSDP' in config.trainer:
        init_distributed(rank, world_size, port=config.fsdp_port)

    if rank == 0 and config.wandb.enabled:
        os.environ['WANDB_CACHE_DIR'] = get_local_dir(config.local_dirs)
        wandb.init(
            entity=config.wandb.entity,
            project=config.wandb.project,
            config=OmegaConf.to_container(config),
            dir=get_local_dir(config.local_dirs),
            name=config.exp_name,
        )

    TrainerClass = getattr(trainers, config.trainer)
    print(f'Creating trainer on process {rank} with world size {world_size}')
    trainer = TrainerClass(policy, config, config.seed, config.local_run_dir, reference_model=reference_model, rank=rank, world_size=world_size)

    trainer.train()

    # print("rank ",rank, "finished all training.")
    #统计部分代码

    from collections import Counter
    # for layer_idx, picks in moe_records.items():
    #     cnt = Counter(picks)
    #     total = sum(cnt.values())
    #     print(f"rank, {rank}, Layer {layer_idx}: total selections = {total}")
    #     for expert_id, c in sorted(cnt.items()):
    #         print(f"rank, {rank}, Expert {expert_id} selected {c} times ({c/total:.2%})")
    # ===== 在此处加入全局统计 =====
    # overall_cnt = Counter()           # 汇总所有层
    # overall_total = 0                 # 总调用次数

    # for picks in moe_records.values():
    #     overall_cnt.update(picks)
    #     overall_total += len(picks)

    # print(f"\nrank, {rank}, Overall expert distribution across ALL layers:")
    # for expert_id, c in sorted(overall_cnt.items()):
    #     print(f"rank, {rank}, Expert {expert_id} selected {c} times ({c/overall_total:.2%})")

    # # #统计每一层的高频专家
    if rank == 0:
        results = {}
        for layer, expert_list in moe_records.items():
            # 1. 统计每个专家被选中的次数
            cnt = Counter(expert_list)
            # 2. 取出次数最多的 8 个专家
            top8 = [expert_id for expert_id, _ in cnt.most_common(8)]
            # 3. 取出次数最少的 8 个专家
            #    注意 Counter 没有 least_common 方法，这里通过排序实现：
            bottom8 = [expert_id for expert_id, _ in sorted(cnt.items(), key=lambda x: x[1])[:8]]
            results[layer] = {
                'top8': top8,
                'bottom8': bottom8
            }
        top8_name = []
        bottom8_name = [] 
        for layer in sorted(results):
            for i in results[layer]['top8']:
                top8_name.extend([         
                    f"{layer}.mlp.experts.{i}.gate_proj",
                    f"{layer}.mlp.experts.{i}.up_proj",
                    f"{layer}.mlp.experts.{i}.down_proj"])
            for i in results[layer]['bottom8']:    
                bottom8_name.extend([         
                    f"{layer}.mlp.experts.{i}.gate_proj",
                    f"{layer}.mlp.experts.{i}.up_proj",
                    f"{layer}.mlp.experts.{i}.down_proj"])
        print("top8 is ", top8_name)
        print("bottom8 is", bottom8_name)

    # 2. 关闭 wandb（只在 rank 0 调）
    if rank == 0 and config.wandb.enabled and wandb.run is not None:
        wandb.finish()           # flush & close background threads

    # 3. 销毁分布式进程组，释放 NCCL 资源
    if dist.is_initialized():
        dist.destroy_process_group()

def get_target_modules(
    target: Optional[List[int]] = None,
    num_experts: int = 64,
    select_k: int = 8
) -> List[str]:
    """
    根据 target 列表生成 target_modules：
    - 如果 target 不为空：直接使用 target 中的下标。
    - 如果 target 为空或 None：从 num_experts 中随机选择 select_k 个下标。
    最终对于每个下标 i，都会生成 ['i.gate_proj', 'i.up_proj', 'i.down_proj']。

    :param target: 用户指定的专家下标列表，长度为 select_k；为空时随机选择
    :param num_experts: 专家总数，默认为 64
    :param select_k: 需要选取的专家数量，默认为 8
    :return: 形如 ['0.gate_proj', '0.up_proj', ..., '7.down_proj'] 的列表
    """
    # 如果没有传入 target 或者传入空列表，则随机选取 select_k 个专家
    if not target:
        target = random.sample(range(num_experts), select_k)
    
    # 构造模块名称
    modules = []
    for idx in target:
        modules.extend([
            f"{idx}.gate_proj",
            f"{idx}.up_proj",
            f"{idx}.down_proj"
        ])
    return modules



@hydra.main(version_base=None, config_path="../config", config_name="config")
def main(config: DictConfig):
    """Main entry point for training. Validates config, creates/initializes model(s), and kicks off worker process(es)."""

    # Resolve hydra references, e.g. so we don't re-compute the run directory
    OmegaConf.resolve(config)

    missing_keys: Set[str] = OmegaConf.missing_keys(config)
    if missing_keys:
        raise ValueError(f"Got missing keys in config:\n{missing_keys}")

    print(OmegaConf.to_yaml(config))

    config_path = os.path.join(config.local_run_dir, 'config.yaml')
    with open(config_path, 'w') as f:
        OmegaConf.save(config, f)

    print('=' * 140)
    print(f'Writing to {socket.gethostname()}:{config.local_run_dir}')
    print('=' * 140)

    os.environ['XDG_CACHE_HOME'] = get_local_dir(config.local_dirs)
    
    model_kwargs = {'device_map': 'balanced'} if config.trainer == 'BasicTrainer' else {}
    policy_dtype = getattr(torch, config.model.policy_dtype)
    
    load_path = config.model.name_or_path
    print('building policy from path', load_path)
    
    policy = transformers.AutoModelForCausalLM.from_pretrained(load_path, low_cpu_mem_usage=True, 
                                        use_cache=False, torch_dtype=policy_dtype, **model_kwargs)
    #insert hook to calculate experts 
    # #只统计第l层的信息
    # l = 7
    for idx, layer in enumerate(policy.model.layers):
        module = getattr(layer, "mlp", None)
        if module is not None and type(module).__name__ == "OlmoeSparseMoeBlock":
            module.register_forward_hook(MoEHook(idx))
            print(f"[Hook] Registered on model.layers[{idx}].mlp")

    #adapter parts
    tokenizer = transformers.AutoTokenizer.from_pretrained(load_path)
    if tokenizer.pad_token_id is None:
        tokenizer.add_special_tokens({'pad_token': '<PAD>'})
        policy.config.pad_token_id = tokenizer.pad_token_id
        policy.resize_token_embeddings(len(tokenizer))
    
    if config.model.archive is None:
        #target = [17,2,6,11,29,27,52,57]
        target = [49,26,53,30,3,16,50,14]
        target = get_target_modules(target)

        #include more modules 
        #target.extend(['gate'])
        target.extend(['q_proj','k_proj','v_proj','o_proj','gate'])
        #print("target modules are ", target)
        peft_config = LoraConfig(
                r=config.lora_rank,
                lora_alpha=config.lora_alpha,
                lora_dropout=0.05,
                bias="none",
                task_type="CAUSAL_LM",
                #target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
                #target_modules=['w1','w2','w3','gate']
                #target_modules=['w1','w2','w3']
                #target_modules= 'all-linear'
                #for olmoe-all
                #target_modules= ['gate_proj','up_proj','down_proj','gate']
                #target_modules= ['gate_proj','up_proj','down_proj']
                #target_modules= ['2.gate_proj','2.up_proj','2.down_proj','4.gate_proj','4.up_proj','4.down_proj','13.gate_proj','13.up_proj','13.down_proj','21.gate_proj','21.up_proj','21.down_proj','22.gate_proj','22.up_proj','22.down_proj','35.gate_proj','35.up_proj','35.down_proj','39.gate_proj','39.up_proj','39.down_proj','45.gate_proj','45.up_proj','45.down_proj']
                #target_modules = target
                #target_modules = ['0.mlp.experts.34.gate_proj', '0.mlp.experts.34.up_proj', '0.mlp.experts.34.down_proj', '0.mlp.experts.12.gate_proj', '0.mlp.experts.12.up_proj', '0.mlp.experts.12.down_proj', '0.mlp.experts.37.gate_proj', '0.mlp.experts.37.up_proj', '0.mlp.experts.37.down_proj', '0.mlp.experts.51.gate_proj', '0.mlp.experts.51.up_proj', '0.mlp.experts.51.down_proj', '0.mlp.experts.32.gate_proj', '0.mlp.experts.32.up_proj', '0.mlp.experts.32.down_proj', '0.mlp.experts.62.gate_proj', '0.mlp.experts.62.up_proj', '0.mlp.experts.62.down_proj', '0.mlp.experts.3.gate_proj', '0.mlp.experts.3.up_proj', '0.mlp.experts.3.down_proj', '0.mlp.experts.13.gate_proj', '0.mlp.experts.13.up_proj', '0.mlp.experts.13.down_proj', '1.mlp.experts.28.gate_proj', '1.mlp.experts.28.up_proj', '1.mlp.experts.28.down_proj', '1.mlp.experts.43.gate_proj', '1.mlp.experts.43.up_proj', '1.mlp.experts.43.down_proj', '1.mlp.experts.6.gate_proj', '1.mlp.experts.6.up_proj', '1.mlp.experts.6.down_proj', '1.mlp.experts.33.gate_proj', '1.mlp.experts.33.up_proj', '1.mlp.experts.33.down_proj', '1.mlp.experts.26.gate_proj', '1.mlp.experts.26.up_proj', '1.mlp.experts.26.down_proj', '1.mlp.experts.10.gate_proj', '1.mlp.experts.10.up_proj', '1.mlp.experts.10.down_proj', '1.mlp.experts.38.gate_proj', '1.mlp.experts.38.up_proj', '1.mlp.experts.38.down_proj', '1.mlp.experts.56.gate_proj', '1.mlp.experts.56.up_proj', '1.mlp.experts.56.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.37.gate_proj', '2.mlp.experts.37.up_proj', '2.mlp.experts.37.down_proj', '2.mlp.experts.2.gate_proj', '2.mlp.experts.2.up_proj', '2.mlp.experts.2.down_proj', '2.mlp.experts.43.gate_proj', '2.mlp.experts.43.up_proj', '2.mlp.experts.43.down_proj', '2.mlp.experts.42.gate_proj', '2.mlp.experts.42.up_proj', '2.mlp.experts.42.down_proj', '2.mlp.experts.18.gate_proj', '2.mlp.experts.18.up_proj', '2.mlp.experts.18.down_proj', '2.mlp.experts.44.gate_proj', '2.mlp.experts.44.up_proj', '2.mlp.experts.44.down_proj', '2.mlp.experts.59.gate_proj', '2.mlp.experts.59.up_proj', '2.mlp.experts.59.down_proj', '3.mlp.experts.53.gate_proj', '3.mlp.experts.53.up_proj', '3.mlp.experts.53.down_proj', '3.mlp.experts.21.gate_proj', '3.mlp.experts.21.up_proj', '3.mlp.experts.21.down_proj', '3.mlp.experts.58.gate_proj', '3.mlp.experts.58.up_proj', '3.mlp.experts.58.down_proj', '3.mlp.experts.25.gate_proj', '3.mlp.experts.25.up_proj', '3.mlp.experts.25.down_proj', '3.mlp.experts.13.gate_proj', '3.mlp.experts.13.up_proj', '3.mlp.experts.13.down_proj', '3.mlp.experts.26.gate_proj', '3.mlp.experts.26.up_proj', '3.mlp.experts.26.down_proj', '3.mlp.experts.8.gate_proj', '3.mlp.experts.8.up_proj', '3.mlp.experts.8.down_proj', '3.mlp.experts.60.gate_proj', '3.mlp.experts.60.up_proj', '3.mlp.experts.60.down_proj', '4.mlp.experts.0.gate_proj', '4.mlp.experts.0.up_proj', '4.mlp.experts.0.down_proj', '4.mlp.experts.3.gate_proj', '4.mlp.experts.3.up_proj', '4.mlp.experts.3.down_proj', '4.mlp.experts.53.gate_proj', '4.mlp.experts.53.up_proj', '4.mlp.experts.53.down_proj', '4.mlp.experts.20.gate_proj', '4.mlp.experts.20.up_proj', '4.mlp.experts.20.down_proj', '4.mlp.experts.12.gate_proj', '4.mlp.experts.12.up_proj', '4.mlp.experts.12.down_proj', '4.mlp.experts.32.gate_proj', '4.mlp.experts.32.up_proj', '4.mlp.experts.32.down_proj', '4.mlp.experts.15.gate_proj', '4.mlp.experts.15.up_proj', '4.mlp.experts.15.down_proj', '4.mlp.experts.30.gate_proj', '4.mlp.experts.30.up_proj', '4.mlp.experts.30.down_proj', '5.mlp.experts.3.gate_proj', '5.mlp.experts.3.up_proj', '5.mlp.experts.3.down_proj', '5.mlp.experts.49.gate_proj', '5.mlp.experts.49.up_proj', '5.mlp.experts.49.down_proj', '5.mlp.experts.46.gate_proj', '5.mlp.experts.46.up_proj', '5.mlp.experts.46.down_proj', '5.mlp.experts.44.gate_proj', '5.mlp.experts.44.up_proj', '5.mlp.experts.44.down_proj', '5.mlp.experts.63.gate_proj', '5.mlp.experts.63.up_proj', '5.mlp.experts.63.down_proj', '5.mlp.experts.28.gate_proj', '5.mlp.experts.28.up_proj', '5.mlp.experts.28.down_proj', '5.mlp.experts.25.gate_proj', '5.mlp.experts.25.up_proj', '5.mlp.experts.25.down_proj', '5.mlp.experts.29.gate_proj', '5.mlp.experts.29.up_proj', '5.mlp.experts.29.down_proj', '6.mlp.experts.60.gate_proj', '6.mlp.experts.60.up_proj', '6.mlp.experts.60.down_proj', '6.mlp.experts.41.gate_proj', '6.mlp.experts.41.up_proj', '6.mlp.experts.41.down_proj', '6.mlp.experts.28.gate_proj', '6.mlp.experts.28.up_proj', '6.mlp.experts.28.down_proj', '6.mlp.experts.11.gate_proj', '6.mlp.experts.11.up_proj', '6.mlp.experts.11.down_proj', '6.mlp.experts.15.gate_proj', '6.mlp.experts.15.up_proj', '6.mlp.experts.15.down_proj', '6.mlp.experts.14.gate_proj', '6.mlp.experts.14.up_proj', '6.mlp.experts.14.down_proj', '6.mlp.experts.46.gate_proj', '6.mlp.experts.46.up_proj', '6.mlp.experts.46.down_proj', '6.mlp.experts.34.gate_proj', '6.mlp.experts.34.up_proj', '6.mlp.experts.34.down_proj', '7.mlp.experts.51.gate_proj', '7.mlp.experts.51.up_proj', '7.mlp.experts.51.down_proj', '7.mlp.experts.43.gate_proj', '7.mlp.experts.43.up_proj', '7.mlp.experts.43.down_proj', '7.mlp.experts.50.gate_proj', '7.mlp.experts.50.up_proj', '7.mlp.experts.50.down_proj', '7.mlp.experts.6.gate_proj', '7.mlp.experts.6.up_proj', '7.mlp.experts.6.down_proj', '7.mlp.experts.11.gate_proj', '7.mlp.experts.11.up_proj', '7.mlp.experts.11.down_proj', '7.mlp.experts.8.gate_proj', '7.mlp.experts.8.up_proj', '7.mlp.experts.8.down_proj', '7.mlp.experts.59.gate_proj', '7.mlp.experts.59.up_proj', '7.mlp.experts.59.down_proj', '7.mlp.experts.7.gate_proj', '7.mlp.experts.7.up_proj', '7.mlp.experts.7.down_proj', '8.mlp.experts.46.gate_proj', '8.mlp.experts.46.up_proj', '8.mlp.experts.46.down_proj', '8.mlp.experts.19.gate_proj', '8.mlp.experts.19.up_proj', '8.mlp.experts.19.down_proj', '8.mlp.experts.4.gate_proj', '8.mlp.experts.4.up_proj', '8.mlp.experts.4.down_proj', '8.mlp.experts.49.gate_proj', '8.mlp.experts.49.up_proj', '8.mlp.experts.49.down_proj', '8.mlp.experts.5.gate_proj', '8.mlp.experts.5.up_proj', '8.mlp.experts.5.down_proj', '8.mlp.experts.39.gate_proj', '8.mlp.experts.39.up_proj', '8.mlp.experts.39.down_proj', '8.mlp.experts.8.gate_proj', '8.mlp.experts.8.up_proj', '8.mlp.experts.8.down_proj', '8.mlp.experts.43.gate_proj', '8.mlp.experts.43.up_proj', '8.mlp.experts.43.down_proj', '9.mlp.experts.19.gate_proj', '9.mlp.experts.19.up_proj', '9.mlp.experts.19.down_proj', '9.mlp.experts.25.gate_proj', '9.mlp.experts.25.up_proj', '9.mlp.experts.25.down_proj', '9.mlp.experts.57.gate_proj', '9.mlp.experts.57.up_proj', '9.mlp.experts.57.down_proj', '9.mlp.experts.56.gate_proj', '9.mlp.experts.56.up_proj', '9.mlp.experts.56.down_proj', '9.mlp.experts.15.gate_proj', '9.mlp.experts.15.up_proj', '9.mlp.experts.15.down_proj', '9.mlp.experts.3.gate_proj', '9.mlp.experts.3.up_proj', '9.mlp.experts.3.down_proj', '9.mlp.experts.18.gate_proj', '9.mlp.experts.18.up_proj', '9.mlp.experts.18.down_proj', '9.mlp.experts.17.gate_proj', '9.mlp.experts.17.up_proj', '9.mlp.experts.17.down_proj', '10.mlp.experts.25.gate_proj', '10.mlp.experts.25.up_proj', '10.mlp.experts.25.down_proj', '10.mlp.experts.63.gate_proj', '10.mlp.experts.63.up_proj', '10.mlp.experts.63.down_proj', '10.mlp.experts.54.gate_proj', '10.mlp.experts.54.up_proj', '10.mlp.experts.54.down_proj', '10.mlp.experts.57.gate_proj', '10.mlp.experts.57.up_proj', '10.mlp.experts.57.down_proj', '10.mlp.experts.40.gate_proj', '10.mlp.experts.40.up_proj', '10.mlp.experts.40.down_proj', '10.mlp.experts.50.gate_proj', '10.mlp.experts.50.up_proj', '10.mlp.experts.50.down_proj', '10.mlp.experts.9.gate_proj', '10.mlp.experts.9.up_proj', '10.mlp.experts.9.down_proj', '10.mlp.experts.8.gate_proj', '10.mlp.experts.8.up_proj', '10.mlp.experts.8.down_proj', '11.mlp.experts.6.gate_proj', '11.mlp.experts.6.up_proj', '11.mlp.experts.6.down_proj', '11.mlp.experts.25.gate_proj', '11.mlp.experts.25.up_proj', '11.mlp.experts.25.down_proj', '11.mlp.experts.40.gate_proj', '11.mlp.experts.40.up_proj', '11.mlp.experts.40.down_proj', '11.mlp.experts.28.gate_proj', '11.mlp.experts.28.up_proj', '11.mlp.experts.28.down_proj', '11.mlp.experts.48.gate_proj', '11.mlp.experts.48.up_proj', '11.mlp.experts.48.down_proj', '11.mlp.experts.9.gate_proj', '11.mlp.experts.9.up_proj', '11.mlp.experts.9.down_proj', '11.mlp.experts.50.gate_proj', '11.mlp.experts.50.up_proj', '11.mlp.experts.50.down_proj', '11.mlp.experts.29.gate_proj', '11.mlp.experts.29.up_proj', '11.mlp.experts.29.down_proj', '12.mlp.experts.37.gate_proj', '12.mlp.experts.37.up_proj', '12.mlp.experts.37.down_proj', '12.mlp.experts.18.gate_proj', '12.mlp.experts.18.up_proj', '12.mlp.experts.18.down_proj', '12.mlp.experts.40.gate_proj', '12.mlp.experts.40.up_proj', '12.mlp.experts.40.down_proj', '12.mlp.experts.3.gate_proj', '12.mlp.experts.3.up_proj', '12.mlp.experts.3.down_proj', '12.mlp.experts.35.gate_proj', '12.mlp.experts.35.up_proj', '12.mlp.experts.35.down_proj', '12.mlp.experts.62.gate_proj', '12.mlp.experts.62.up_proj', '12.mlp.experts.62.down_proj', '12.mlp.experts.56.gate_proj', '12.mlp.experts.56.up_proj', '12.mlp.experts.56.down_proj', '12.mlp.experts.61.gate_proj', '12.mlp.experts.61.up_proj', '12.mlp.experts.61.down_proj', '13.mlp.experts.52.gate_proj', '13.mlp.experts.52.up_proj', '13.mlp.experts.52.down_proj', '13.mlp.experts.14.gate_proj', '13.mlp.experts.14.up_proj', '13.mlp.experts.14.down_proj', '13.mlp.experts.60.gate_proj', '13.mlp.experts.60.up_proj', '13.mlp.experts.60.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.56.gate_proj', '13.mlp.experts.56.up_proj', '13.mlp.experts.56.down_proj', '13.mlp.experts.40.gate_proj', '13.mlp.experts.40.up_proj', '13.mlp.experts.40.down_proj', '13.mlp.experts.23.gate_proj', '13.mlp.experts.23.up_proj', '13.mlp.experts.23.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.55.gate_proj', '14.mlp.experts.55.up_proj', '14.mlp.experts.55.down_proj', '14.mlp.experts.14.gate_proj', '14.mlp.experts.14.up_proj', '14.mlp.experts.14.down_proj', '14.mlp.experts.32.gate_proj', '14.mlp.experts.32.up_proj', '14.mlp.experts.32.down_proj', '14.mlp.experts.33.gate_proj', '14.mlp.experts.33.up_proj', '14.mlp.experts.33.down_proj', '14.mlp.experts.30.gate_proj', '14.mlp.experts.30.up_proj', '14.mlp.experts.30.down_proj', '14.mlp.experts.27.gate_proj', '14.mlp.experts.27.up_proj', '14.mlp.experts.27.down_proj', '14.mlp.experts.28.gate_proj', '14.mlp.experts.28.up_proj', '14.mlp.experts.28.down_proj', '14.mlp.experts.12.gate_proj', '14.mlp.experts.12.up_proj', '14.mlp.experts.12.down_proj', '15.mlp.experts.42.gate_proj', '15.mlp.experts.42.up_proj', '15.mlp.experts.42.down_proj', '15.mlp.experts.49.gate_proj', '15.mlp.experts.49.up_proj', '15.mlp.experts.49.down_proj', '15.mlp.experts.14.gate_proj', '15.mlp.experts.14.up_proj', '15.mlp.experts.14.down_proj', '15.mlp.experts.41.gate_proj', '15.mlp.experts.41.up_proj', '15.mlp.experts.41.down_proj', '15.mlp.experts.16.gate_proj', '15.mlp.experts.16.up_proj', '15.mlp.experts.16.down_proj', '15.mlp.experts.25.gate_proj', '15.mlp.experts.25.up_proj', '15.mlp.experts.25.down_proj', '15.mlp.experts.59.gate_proj', '15.mlp.experts.59.up_proj', '15.mlp.experts.59.down_proj', '15.mlp.experts.61.gate_proj', '15.mlp.experts.61.up_proj', '15.mlp.experts.61.down_proj']
                #target_modules = ['gate','0.mlp.experts.34.gate_proj', '0.mlp.experts.34.up_proj', '0.mlp.experts.34.down_proj', '0.mlp.experts.12.gate_proj', '0.mlp.experts.12.up_proj', '0.mlp.experts.12.down_proj', '0.mlp.experts.37.gate_proj', '0.mlp.experts.37.up_proj', '0.mlp.experts.37.down_proj', '0.mlp.experts.51.gate_proj', '0.mlp.experts.51.up_proj', '0.mlp.experts.51.down_proj', '0.mlp.experts.32.gate_proj', '0.mlp.experts.32.up_proj', '0.mlp.experts.32.down_proj', '0.mlp.experts.62.gate_proj', '0.mlp.experts.62.up_proj', '0.mlp.experts.62.down_proj', '0.mlp.experts.3.gate_proj', '0.mlp.experts.3.up_proj', '0.mlp.experts.3.down_proj', '0.mlp.experts.13.gate_proj', '0.mlp.experts.13.up_proj', '0.mlp.experts.13.down_proj', '1.mlp.experts.28.gate_proj', '1.mlp.experts.28.up_proj', '1.mlp.experts.28.down_proj', '1.mlp.experts.43.gate_proj', '1.mlp.experts.43.up_proj', '1.mlp.experts.43.down_proj', '1.mlp.experts.6.gate_proj', '1.mlp.experts.6.up_proj', '1.mlp.experts.6.down_proj', '1.mlp.experts.33.gate_proj', '1.mlp.experts.33.up_proj', '1.mlp.experts.33.down_proj', '1.mlp.experts.26.gate_proj', '1.mlp.experts.26.up_proj', '1.mlp.experts.26.down_proj', '1.mlp.experts.10.gate_proj', '1.mlp.experts.10.up_proj', '1.mlp.experts.10.down_proj', '1.mlp.experts.38.gate_proj', '1.mlp.experts.38.up_proj', '1.mlp.experts.38.down_proj', '1.mlp.experts.56.gate_proj', '1.mlp.experts.56.up_proj', '1.mlp.experts.56.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.37.gate_proj', '2.mlp.experts.37.up_proj', '2.mlp.experts.37.down_proj', '2.mlp.experts.2.gate_proj', '2.mlp.experts.2.up_proj', '2.mlp.experts.2.down_proj', '2.mlp.experts.43.gate_proj', '2.mlp.experts.43.up_proj', '2.mlp.experts.43.down_proj', '2.mlp.experts.42.gate_proj', '2.mlp.experts.42.up_proj', '2.mlp.experts.42.down_proj', '2.mlp.experts.18.gate_proj', '2.mlp.experts.18.up_proj', '2.mlp.experts.18.down_proj', '2.mlp.experts.44.gate_proj', '2.mlp.experts.44.up_proj', '2.mlp.experts.44.down_proj', '2.mlp.experts.59.gate_proj', '2.mlp.experts.59.up_proj', '2.mlp.experts.59.down_proj', '3.mlp.experts.53.gate_proj', '3.mlp.experts.53.up_proj', '3.mlp.experts.53.down_proj', '3.mlp.experts.21.gate_proj', '3.mlp.experts.21.up_proj', '3.mlp.experts.21.down_proj', '3.mlp.experts.58.gate_proj', '3.mlp.experts.58.up_proj', '3.mlp.experts.58.down_proj', '3.mlp.experts.25.gate_proj', '3.mlp.experts.25.up_proj', '3.mlp.experts.25.down_proj', '3.mlp.experts.13.gate_proj', '3.mlp.experts.13.up_proj', '3.mlp.experts.13.down_proj', '3.mlp.experts.26.gate_proj', '3.mlp.experts.26.up_proj', '3.mlp.experts.26.down_proj', '3.mlp.experts.8.gate_proj', '3.mlp.experts.8.up_proj', '3.mlp.experts.8.down_proj', '3.mlp.experts.60.gate_proj', '3.mlp.experts.60.up_proj', '3.mlp.experts.60.down_proj', '4.mlp.experts.0.gate_proj', '4.mlp.experts.0.up_proj', '4.mlp.experts.0.down_proj', '4.mlp.experts.3.gate_proj', '4.mlp.experts.3.up_proj', '4.mlp.experts.3.down_proj', '4.mlp.experts.53.gate_proj', '4.mlp.experts.53.up_proj', '4.mlp.experts.53.down_proj', '4.mlp.experts.20.gate_proj', '4.mlp.experts.20.up_proj', '4.mlp.experts.20.down_proj', '4.mlp.experts.12.gate_proj', '4.mlp.experts.12.up_proj', '4.mlp.experts.12.down_proj', '4.mlp.experts.32.gate_proj', '4.mlp.experts.32.up_proj', '4.mlp.experts.32.down_proj', '4.mlp.experts.15.gate_proj', '4.mlp.experts.15.up_proj', '4.mlp.experts.15.down_proj', '4.mlp.experts.30.gate_proj', '4.mlp.experts.30.up_proj', '4.mlp.experts.30.down_proj', '5.mlp.experts.3.gate_proj', '5.mlp.experts.3.up_proj', '5.mlp.experts.3.down_proj', '5.mlp.experts.49.gate_proj', '5.mlp.experts.49.up_proj', '5.mlp.experts.49.down_proj', '5.mlp.experts.46.gate_proj', '5.mlp.experts.46.up_proj', '5.mlp.experts.46.down_proj', '5.mlp.experts.44.gate_proj', '5.mlp.experts.44.up_proj', '5.mlp.experts.44.down_proj', '5.mlp.experts.63.gate_proj', '5.mlp.experts.63.up_proj', '5.mlp.experts.63.down_proj', '5.mlp.experts.28.gate_proj', '5.mlp.experts.28.up_proj', '5.mlp.experts.28.down_proj', '5.mlp.experts.25.gate_proj', '5.mlp.experts.25.up_proj', '5.mlp.experts.25.down_proj', '5.mlp.experts.29.gate_proj', '5.mlp.experts.29.up_proj', '5.mlp.experts.29.down_proj', '6.mlp.experts.60.gate_proj', '6.mlp.experts.60.up_proj', '6.mlp.experts.60.down_proj', '6.mlp.experts.41.gate_proj', '6.mlp.experts.41.up_proj', '6.mlp.experts.41.down_proj', '6.mlp.experts.28.gate_proj', '6.mlp.experts.28.up_proj', '6.mlp.experts.28.down_proj', '6.mlp.experts.11.gate_proj', '6.mlp.experts.11.up_proj', '6.mlp.experts.11.down_proj', '6.mlp.experts.15.gate_proj', '6.mlp.experts.15.up_proj', '6.mlp.experts.15.down_proj', '6.mlp.experts.14.gate_proj', '6.mlp.experts.14.up_proj', '6.mlp.experts.14.down_proj', '6.mlp.experts.46.gate_proj', '6.mlp.experts.46.up_proj', '6.mlp.experts.46.down_proj', '6.mlp.experts.34.gate_proj', '6.mlp.experts.34.up_proj', '6.mlp.experts.34.down_proj', '7.mlp.experts.51.gate_proj', '7.mlp.experts.51.up_proj', '7.mlp.experts.51.down_proj', '7.mlp.experts.43.gate_proj', '7.mlp.experts.43.up_proj', '7.mlp.experts.43.down_proj', '7.mlp.experts.50.gate_proj', '7.mlp.experts.50.up_proj', '7.mlp.experts.50.down_proj', '7.mlp.experts.6.gate_proj', '7.mlp.experts.6.up_proj', '7.mlp.experts.6.down_proj', '7.mlp.experts.11.gate_proj', '7.mlp.experts.11.up_proj', '7.mlp.experts.11.down_proj', '7.mlp.experts.8.gate_proj', '7.mlp.experts.8.up_proj', '7.mlp.experts.8.down_proj', '7.mlp.experts.59.gate_proj', '7.mlp.experts.59.up_proj', '7.mlp.experts.59.down_proj', '7.mlp.experts.7.gate_proj', '7.mlp.experts.7.up_proj', '7.mlp.experts.7.down_proj', '8.mlp.experts.46.gate_proj', '8.mlp.experts.46.up_proj', '8.mlp.experts.46.down_proj', '8.mlp.experts.19.gate_proj', '8.mlp.experts.19.up_proj', '8.mlp.experts.19.down_proj', '8.mlp.experts.4.gate_proj', '8.mlp.experts.4.up_proj', '8.mlp.experts.4.down_proj', '8.mlp.experts.49.gate_proj', '8.mlp.experts.49.up_proj', '8.mlp.experts.49.down_proj', '8.mlp.experts.5.gate_proj', '8.mlp.experts.5.up_proj', '8.mlp.experts.5.down_proj', '8.mlp.experts.39.gate_proj', '8.mlp.experts.39.up_proj', '8.mlp.experts.39.down_proj', '8.mlp.experts.8.gate_proj', '8.mlp.experts.8.up_proj', '8.mlp.experts.8.down_proj', '8.mlp.experts.43.gate_proj', '8.mlp.experts.43.up_proj', '8.mlp.experts.43.down_proj', '9.mlp.experts.19.gate_proj', '9.mlp.experts.19.up_proj', '9.mlp.experts.19.down_proj', '9.mlp.experts.25.gate_proj', '9.mlp.experts.25.up_proj', '9.mlp.experts.25.down_proj', '9.mlp.experts.57.gate_proj', '9.mlp.experts.57.up_proj', '9.mlp.experts.57.down_proj', '9.mlp.experts.56.gate_proj', '9.mlp.experts.56.up_proj', '9.mlp.experts.56.down_proj', '9.mlp.experts.15.gate_proj', '9.mlp.experts.15.up_proj', '9.mlp.experts.15.down_proj', '9.mlp.experts.3.gate_proj', '9.mlp.experts.3.up_proj', '9.mlp.experts.3.down_proj', '9.mlp.experts.18.gate_proj', '9.mlp.experts.18.up_proj', '9.mlp.experts.18.down_proj', '9.mlp.experts.17.gate_proj', '9.mlp.experts.17.up_proj', '9.mlp.experts.17.down_proj', '10.mlp.experts.25.gate_proj', '10.mlp.experts.25.up_proj', '10.mlp.experts.25.down_proj', '10.mlp.experts.63.gate_proj', '10.mlp.experts.63.up_proj', '10.mlp.experts.63.down_proj', '10.mlp.experts.54.gate_proj', '10.mlp.experts.54.up_proj', '10.mlp.experts.54.down_proj', '10.mlp.experts.57.gate_proj', '10.mlp.experts.57.up_proj', '10.mlp.experts.57.down_proj', '10.mlp.experts.40.gate_proj', '10.mlp.experts.40.up_proj', '10.mlp.experts.40.down_proj', '10.mlp.experts.50.gate_proj', '10.mlp.experts.50.up_proj', '10.mlp.experts.50.down_proj', '10.mlp.experts.9.gate_proj', '10.mlp.experts.9.up_proj', '10.mlp.experts.9.down_proj', '10.mlp.experts.8.gate_proj', '10.mlp.experts.8.up_proj', '10.mlp.experts.8.down_proj', '11.mlp.experts.6.gate_proj', '11.mlp.experts.6.up_proj', '11.mlp.experts.6.down_proj', '11.mlp.experts.25.gate_proj', '11.mlp.experts.25.up_proj', '11.mlp.experts.25.down_proj', '11.mlp.experts.40.gate_proj', '11.mlp.experts.40.up_proj', '11.mlp.experts.40.down_proj', '11.mlp.experts.28.gate_proj', '11.mlp.experts.28.up_proj', '11.mlp.experts.28.down_proj', '11.mlp.experts.48.gate_proj', '11.mlp.experts.48.up_proj', '11.mlp.experts.48.down_proj', '11.mlp.experts.9.gate_proj', '11.mlp.experts.9.up_proj', '11.mlp.experts.9.down_proj', '11.mlp.experts.50.gate_proj', '11.mlp.experts.50.up_proj', '11.mlp.experts.50.down_proj', '11.mlp.experts.29.gate_proj', '11.mlp.experts.29.up_proj', '11.mlp.experts.29.down_proj', '12.mlp.experts.37.gate_proj', '12.mlp.experts.37.up_proj', '12.mlp.experts.37.down_proj', '12.mlp.experts.18.gate_proj', '12.mlp.experts.18.up_proj', '12.mlp.experts.18.down_proj', '12.mlp.experts.40.gate_proj', '12.mlp.experts.40.up_proj', '12.mlp.experts.40.down_proj', '12.mlp.experts.3.gate_proj', '12.mlp.experts.3.up_proj', '12.mlp.experts.3.down_proj', '12.mlp.experts.35.gate_proj', '12.mlp.experts.35.up_proj', '12.mlp.experts.35.down_proj', '12.mlp.experts.62.gate_proj', '12.mlp.experts.62.up_proj', '12.mlp.experts.62.down_proj', '12.mlp.experts.56.gate_proj', '12.mlp.experts.56.up_proj', '12.mlp.experts.56.down_proj', '12.mlp.experts.61.gate_proj', '12.mlp.experts.61.up_proj', '12.mlp.experts.61.down_proj', '13.mlp.experts.52.gate_proj', '13.mlp.experts.52.up_proj', '13.mlp.experts.52.down_proj', '13.mlp.experts.14.gate_proj', '13.mlp.experts.14.up_proj', '13.mlp.experts.14.down_proj', '13.mlp.experts.60.gate_proj', '13.mlp.experts.60.up_proj', '13.mlp.experts.60.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.56.gate_proj', '13.mlp.experts.56.up_proj', '13.mlp.experts.56.down_proj', '13.mlp.experts.40.gate_proj', '13.mlp.experts.40.up_proj', '13.mlp.experts.40.down_proj', '13.mlp.experts.23.gate_proj', '13.mlp.experts.23.up_proj', '13.mlp.experts.23.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.55.gate_proj', '14.mlp.experts.55.up_proj', '14.mlp.experts.55.down_proj', '14.mlp.experts.14.gate_proj', '14.mlp.experts.14.up_proj', '14.mlp.experts.14.down_proj', '14.mlp.experts.32.gate_proj', '14.mlp.experts.32.up_proj', '14.mlp.experts.32.down_proj', '14.mlp.experts.33.gate_proj', '14.mlp.experts.33.up_proj', '14.mlp.experts.33.down_proj', '14.mlp.experts.30.gate_proj', '14.mlp.experts.30.up_proj', '14.mlp.experts.30.down_proj', '14.mlp.experts.27.gate_proj', '14.mlp.experts.27.up_proj', '14.mlp.experts.27.down_proj', '14.mlp.experts.28.gate_proj', '14.mlp.experts.28.up_proj', '14.mlp.experts.28.down_proj', '14.mlp.experts.12.gate_proj', '14.mlp.experts.12.up_proj', '14.mlp.experts.12.down_proj', '15.mlp.experts.42.gate_proj', '15.mlp.experts.42.up_proj', '15.mlp.experts.42.down_proj', '15.mlp.experts.49.gate_proj', '15.mlp.experts.49.up_proj', '15.mlp.experts.49.down_proj', '15.mlp.experts.14.gate_proj', '15.mlp.experts.14.up_proj', '15.mlp.experts.14.down_proj', '15.mlp.experts.41.gate_proj', '15.mlp.experts.41.up_proj', '15.mlp.experts.41.down_proj', '15.mlp.experts.16.gate_proj', '15.mlp.experts.16.up_proj', '15.mlp.experts.16.down_proj', '15.mlp.experts.25.gate_proj', '15.mlp.experts.25.up_proj', '15.mlp.experts.25.down_proj', '15.mlp.experts.59.gate_proj', '15.mlp.experts.59.up_proj', '15.mlp.experts.59.down_proj', '15.mlp.experts.61.gate_proj', '15.mlp.experts.61.up_proj', '15.mlp.experts.61.down_proj']
                #target_modules = ['q_proj','k_proj','v_proj','o_proj','gate','0.mlp.experts.34.gate_proj', '0.mlp.experts.34.up_proj', '0.mlp.experts.34.down_proj', '0.mlp.experts.12.gate_proj', '0.mlp.experts.12.up_proj', '0.mlp.experts.12.down_proj', '0.mlp.experts.37.gate_proj', '0.mlp.experts.37.up_proj', '0.mlp.experts.37.down_proj', '0.mlp.experts.51.gate_proj', '0.mlp.experts.51.up_proj', '0.mlp.experts.51.down_proj', '0.mlp.experts.32.gate_proj', '0.mlp.experts.32.up_proj', '0.mlp.experts.32.down_proj', '0.mlp.experts.62.gate_proj', '0.mlp.experts.62.up_proj', '0.mlp.experts.62.down_proj', '0.mlp.experts.3.gate_proj', '0.mlp.experts.3.up_proj', '0.mlp.experts.3.down_proj', '0.mlp.experts.13.gate_proj', '0.mlp.experts.13.up_proj', '0.mlp.experts.13.down_proj', '1.mlp.experts.28.gate_proj', '1.mlp.experts.28.up_proj', '1.mlp.experts.28.down_proj', '1.mlp.experts.43.gate_proj', '1.mlp.experts.43.up_proj', '1.mlp.experts.43.down_proj', '1.mlp.experts.6.gate_proj', '1.mlp.experts.6.up_proj', '1.mlp.experts.6.down_proj', '1.mlp.experts.33.gate_proj', '1.mlp.experts.33.up_proj', '1.mlp.experts.33.down_proj', '1.mlp.experts.26.gate_proj', '1.mlp.experts.26.up_proj', '1.mlp.experts.26.down_proj', '1.mlp.experts.10.gate_proj', '1.mlp.experts.10.up_proj', '1.mlp.experts.10.down_proj', '1.mlp.experts.38.gate_proj', '1.mlp.experts.38.up_proj', '1.mlp.experts.38.down_proj', '1.mlp.experts.56.gate_proj', '1.mlp.experts.56.up_proj', '1.mlp.experts.56.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.37.gate_proj', '2.mlp.experts.37.up_proj', '2.mlp.experts.37.down_proj', '2.mlp.experts.2.gate_proj', '2.mlp.experts.2.up_proj', '2.mlp.experts.2.down_proj', '2.mlp.experts.43.gate_proj', '2.mlp.experts.43.up_proj', '2.mlp.experts.43.down_proj', '2.mlp.experts.42.gate_proj', '2.mlp.experts.42.up_proj', '2.mlp.experts.42.down_proj', '2.mlp.experts.18.gate_proj', '2.mlp.experts.18.up_proj', '2.mlp.experts.18.down_proj', '2.mlp.experts.44.gate_proj', '2.mlp.experts.44.up_proj', '2.mlp.experts.44.down_proj', '2.mlp.experts.59.gate_proj', '2.mlp.experts.59.up_proj', '2.mlp.experts.59.down_proj', '3.mlp.experts.53.gate_proj', '3.mlp.experts.53.up_proj', '3.mlp.experts.53.down_proj', '3.mlp.experts.21.gate_proj', '3.mlp.experts.21.up_proj', '3.mlp.experts.21.down_proj', '3.mlp.experts.58.gate_proj', '3.mlp.experts.58.up_proj', '3.mlp.experts.58.down_proj', '3.mlp.experts.25.gate_proj', '3.mlp.experts.25.up_proj', '3.mlp.experts.25.down_proj', '3.mlp.experts.13.gate_proj', '3.mlp.experts.13.up_proj', '3.mlp.experts.13.down_proj', '3.mlp.experts.26.gate_proj', '3.mlp.experts.26.up_proj', '3.mlp.experts.26.down_proj', '3.mlp.experts.8.gate_proj', '3.mlp.experts.8.up_proj', '3.mlp.experts.8.down_proj', '3.mlp.experts.60.gate_proj', '3.mlp.experts.60.up_proj', '3.mlp.experts.60.down_proj', '4.mlp.experts.0.gate_proj', '4.mlp.experts.0.up_proj', '4.mlp.experts.0.down_proj', '4.mlp.experts.3.gate_proj', '4.mlp.experts.3.up_proj', '4.mlp.experts.3.down_proj', '4.mlp.experts.53.gate_proj', '4.mlp.experts.53.up_proj', '4.mlp.experts.53.down_proj', '4.mlp.experts.20.gate_proj', '4.mlp.experts.20.up_proj', '4.mlp.experts.20.down_proj', '4.mlp.experts.12.gate_proj', '4.mlp.experts.12.up_proj', '4.mlp.experts.12.down_proj', '4.mlp.experts.32.gate_proj', '4.mlp.experts.32.up_proj', '4.mlp.experts.32.down_proj', '4.mlp.experts.15.gate_proj', '4.mlp.experts.15.up_proj', '4.mlp.experts.15.down_proj', '4.mlp.experts.30.gate_proj', '4.mlp.experts.30.up_proj', '4.mlp.experts.30.down_proj', '5.mlp.experts.3.gate_proj', '5.mlp.experts.3.up_proj', '5.mlp.experts.3.down_proj', '5.mlp.experts.49.gate_proj', '5.mlp.experts.49.up_proj', '5.mlp.experts.49.down_proj', '5.mlp.experts.46.gate_proj', '5.mlp.experts.46.up_proj', '5.mlp.experts.46.down_proj', '5.mlp.experts.44.gate_proj', '5.mlp.experts.44.up_proj', '5.mlp.experts.44.down_proj', '5.mlp.experts.63.gate_proj', '5.mlp.experts.63.up_proj', '5.mlp.experts.63.down_proj', '5.mlp.experts.28.gate_proj', '5.mlp.experts.28.up_proj', '5.mlp.experts.28.down_proj', '5.mlp.experts.25.gate_proj', '5.mlp.experts.25.up_proj', '5.mlp.experts.25.down_proj', '5.mlp.experts.29.gate_proj', '5.mlp.experts.29.up_proj', '5.mlp.experts.29.down_proj', '6.mlp.experts.60.gate_proj', '6.mlp.experts.60.up_proj', '6.mlp.experts.60.down_proj', '6.mlp.experts.41.gate_proj', '6.mlp.experts.41.up_proj', '6.mlp.experts.41.down_proj', '6.mlp.experts.28.gate_proj', '6.mlp.experts.28.up_proj', '6.mlp.experts.28.down_proj', '6.mlp.experts.11.gate_proj', '6.mlp.experts.11.up_proj', '6.mlp.experts.11.down_proj', '6.mlp.experts.15.gate_proj', '6.mlp.experts.15.up_proj', '6.mlp.experts.15.down_proj', '6.mlp.experts.14.gate_proj', '6.mlp.experts.14.up_proj', '6.mlp.experts.14.down_proj', '6.mlp.experts.46.gate_proj', '6.mlp.experts.46.up_proj', '6.mlp.experts.46.down_proj', '6.mlp.experts.34.gate_proj', '6.mlp.experts.34.up_proj', '6.mlp.experts.34.down_proj', '7.mlp.experts.51.gate_proj', '7.mlp.experts.51.up_proj', '7.mlp.experts.51.down_proj', '7.mlp.experts.43.gate_proj', '7.mlp.experts.43.up_proj', '7.mlp.experts.43.down_proj', '7.mlp.experts.50.gate_proj', '7.mlp.experts.50.up_proj', '7.mlp.experts.50.down_proj', '7.mlp.experts.6.gate_proj', '7.mlp.experts.6.up_proj', '7.mlp.experts.6.down_proj', '7.mlp.experts.11.gate_proj', '7.mlp.experts.11.up_proj', '7.mlp.experts.11.down_proj', '7.mlp.experts.8.gate_proj', '7.mlp.experts.8.up_proj', '7.mlp.experts.8.down_proj', '7.mlp.experts.59.gate_proj', '7.mlp.experts.59.up_proj', '7.mlp.experts.59.down_proj', '7.mlp.experts.7.gate_proj', '7.mlp.experts.7.up_proj', '7.mlp.experts.7.down_proj', '8.mlp.experts.46.gate_proj', '8.mlp.experts.46.up_proj', '8.mlp.experts.46.down_proj', '8.mlp.experts.19.gate_proj', '8.mlp.experts.19.up_proj', '8.mlp.experts.19.down_proj', '8.mlp.experts.4.gate_proj', '8.mlp.experts.4.up_proj', '8.mlp.experts.4.down_proj', '8.mlp.experts.49.gate_proj', '8.mlp.experts.49.up_proj', '8.mlp.experts.49.down_proj', '8.mlp.experts.5.gate_proj', '8.mlp.experts.5.up_proj', '8.mlp.experts.5.down_proj', '8.mlp.experts.39.gate_proj', '8.mlp.experts.39.up_proj', '8.mlp.experts.39.down_proj', '8.mlp.experts.8.gate_proj', '8.mlp.experts.8.up_proj', '8.mlp.experts.8.down_proj', '8.mlp.experts.43.gate_proj', '8.mlp.experts.43.up_proj', '8.mlp.experts.43.down_proj', '9.mlp.experts.19.gate_proj', '9.mlp.experts.19.up_proj', '9.mlp.experts.19.down_proj', '9.mlp.experts.25.gate_proj', '9.mlp.experts.25.up_proj', '9.mlp.experts.25.down_proj', '9.mlp.experts.57.gate_proj', '9.mlp.experts.57.up_proj', '9.mlp.experts.57.down_proj', '9.mlp.experts.56.gate_proj', '9.mlp.experts.56.up_proj', '9.mlp.experts.56.down_proj', '9.mlp.experts.15.gate_proj', '9.mlp.experts.15.up_proj', '9.mlp.experts.15.down_proj', '9.mlp.experts.3.gate_proj', '9.mlp.experts.3.up_proj', '9.mlp.experts.3.down_proj', '9.mlp.experts.18.gate_proj', '9.mlp.experts.18.up_proj', '9.mlp.experts.18.down_proj', '9.mlp.experts.17.gate_proj', '9.mlp.experts.17.up_proj', '9.mlp.experts.17.down_proj', '10.mlp.experts.25.gate_proj', '10.mlp.experts.25.up_proj', '10.mlp.experts.25.down_proj', '10.mlp.experts.63.gate_proj', '10.mlp.experts.63.up_proj', '10.mlp.experts.63.down_proj', '10.mlp.experts.54.gate_proj', '10.mlp.experts.54.up_proj', '10.mlp.experts.54.down_proj', '10.mlp.experts.57.gate_proj', '10.mlp.experts.57.up_proj', '10.mlp.experts.57.down_proj', '10.mlp.experts.40.gate_proj', '10.mlp.experts.40.up_proj', '10.mlp.experts.40.down_proj', '10.mlp.experts.50.gate_proj', '10.mlp.experts.50.up_proj', '10.mlp.experts.50.down_proj', '10.mlp.experts.9.gate_proj', '10.mlp.experts.9.up_proj', '10.mlp.experts.9.down_proj', '10.mlp.experts.8.gate_proj', '10.mlp.experts.8.up_proj', '10.mlp.experts.8.down_proj', '11.mlp.experts.6.gate_proj', '11.mlp.experts.6.up_proj', '11.mlp.experts.6.down_proj', '11.mlp.experts.25.gate_proj', '11.mlp.experts.25.up_proj', '11.mlp.experts.25.down_proj', '11.mlp.experts.40.gate_proj', '11.mlp.experts.40.up_proj', '11.mlp.experts.40.down_proj', '11.mlp.experts.28.gate_proj', '11.mlp.experts.28.up_proj', '11.mlp.experts.28.down_proj', '11.mlp.experts.48.gate_proj', '11.mlp.experts.48.up_proj', '11.mlp.experts.48.down_proj', '11.mlp.experts.9.gate_proj', '11.mlp.experts.9.up_proj', '11.mlp.experts.9.down_proj', '11.mlp.experts.50.gate_proj', '11.mlp.experts.50.up_proj', '11.mlp.experts.50.down_proj', '11.mlp.experts.29.gate_proj', '11.mlp.experts.29.up_proj', '11.mlp.experts.29.down_proj', '12.mlp.experts.37.gate_proj', '12.mlp.experts.37.up_proj', '12.mlp.experts.37.down_proj', '12.mlp.experts.18.gate_proj', '12.mlp.experts.18.up_proj', '12.mlp.experts.18.down_proj', '12.mlp.experts.40.gate_proj', '12.mlp.experts.40.up_proj', '12.mlp.experts.40.down_proj', '12.mlp.experts.3.gate_proj', '12.mlp.experts.3.up_proj', '12.mlp.experts.3.down_proj', '12.mlp.experts.35.gate_proj', '12.mlp.experts.35.up_proj', '12.mlp.experts.35.down_proj', '12.mlp.experts.62.gate_proj', '12.mlp.experts.62.up_proj', '12.mlp.experts.62.down_proj', '12.mlp.experts.56.gate_proj', '12.mlp.experts.56.up_proj', '12.mlp.experts.56.down_proj', '12.mlp.experts.61.gate_proj', '12.mlp.experts.61.up_proj', '12.mlp.experts.61.down_proj', '13.mlp.experts.52.gate_proj', '13.mlp.experts.52.up_proj', '13.mlp.experts.52.down_proj', '13.mlp.experts.14.gate_proj', '13.mlp.experts.14.up_proj', '13.mlp.experts.14.down_proj', '13.mlp.experts.60.gate_proj', '13.mlp.experts.60.up_proj', '13.mlp.experts.60.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.56.gate_proj', '13.mlp.experts.56.up_proj', '13.mlp.experts.56.down_proj', '13.mlp.experts.40.gate_proj', '13.mlp.experts.40.up_proj', '13.mlp.experts.40.down_proj', '13.mlp.experts.23.gate_proj', '13.mlp.experts.23.up_proj', '13.mlp.experts.23.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.55.gate_proj', '14.mlp.experts.55.up_proj', '14.mlp.experts.55.down_proj', '14.mlp.experts.14.gate_proj', '14.mlp.experts.14.up_proj', '14.mlp.experts.14.down_proj', '14.mlp.experts.32.gate_proj', '14.mlp.experts.32.up_proj', '14.mlp.experts.32.down_proj', '14.mlp.experts.33.gate_proj', '14.mlp.experts.33.up_proj', '14.mlp.experts.33.down_proj', '14.mlp.experts.30.gate_proj', '14.mlp.experts.30.up_proj', '14.mlp.experts.30.down_proj', '14.mlp.experts.27.gate_proj', '14.mlp.experts.27.up_proj', '14.mlp.experts.27.down_proj', '14.mlp.experts.28.gate_proj', '14.mlp.experts.28.up_proj', '14.mlp.experts.28.down_proj', '14.mlp.experts.12.gate_proj', '14.mlp.experts.12.up_proj', '14.mlp.experts.12.down_proj', '15.mlp.experts.42.gate_proj', '15.mlp.experts.42.up_proj', '15.mlp.experts.42.down_proj', '15.mlp.experts.49.gate_proj', '15.mlp.experts.49.up_proj', '15.mlp.experts.49.down_proj', '15.mlp.experts.14.gate_proj', '15.mlp.experts.14.up_proj', '15.mlp.experts.14.down_proj', '15.mlp.experts.41.gate_proj', '15.mlp.experts.41.up_proj', '15.mlp.experts.41.down_proj', '15.mlp.experts.16.gate_proj', '15.mlp.experts.16.up_proj', '15.mlp.experts.16.down_proj', '15.mlp.experts.25.gate_proj', '15.mlp.experts.25.up_proj', '15.mlp.experts.25.down_proj', '15.mlp.experts.59.gate_proj', '15.mlp.experts.59.up_proj', '15.mlp.experts.59.down_proj', '15.mlp.experts.61.gate_proj', '15.mlp.experts.61.up_proj', '15.mlp.experts.61.down_proj']
                #target_modules for code
                #target_modules = ['0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.8.gate_proj', '0.mlp.experts.8.up_proj', '0.mlp.experts.8.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '0.mlp.experts.47.gate_proj', '0.mlp.experts.47.up_proj', '0.mlp.experts.47.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.53.gate_proj', '0.mlp.experts.53.up_proj', '0.mlp.experts.53.down_proj', '1.mlp.experts.25.gate_proj', '1.mlp.experts.25.up_proj', '1.mlp.experts.25.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.18.gate_proj', '1.mlp.experts.18.up_proj', '1.mlp.experts.18.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.17.gate_proj', '1.mlp.experts.17.up_proj', '1.mlp.experts.17.down_proj', '2.mlp.experts.40.gate_proj', '2.mlp.experts.40.up_proj', '2.mlp.experts.40.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.60.gate_proj', '2.mlp.experts.60.up_proj', '2.mlp.experts.60.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.61.gate_proj', '2.mlp.experts.61.up_proj', '2.mlp.experts.61.down_proj', '2.mlp.experts.10.gate_proj', '2.mlp.experts.10.up_proj', '2.mlp.experts.10.down_proj', '2.mlp.experts.45.gate_proj', '2.mlp.experts.45.up_proj', '2.mlp.experts.45.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.39.gate_proj', '3.mlp.experts.39.up_proj', '3.mlp.experts.39.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.44.gate_proj', '3.mlp.experts.44.up_proj', '3.mlp.experts.44.down_proj', '3.mlp.experts.16.gate_proj', '3.mlp.experts.16.up_proj', '3.mlp.experts.16.down_proj', '4.mlp.experts.21.gate_proj', '4.mlp.experts.21.up_proj', '4.mlp.experts.21.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.17.gate_proj', '4.mlp.experts.17.up_proj', '4.mlp.experts.17.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.55.gate_proj', '4.mlp.experts.55.up_proj', '4.mlp.experts.55.down_proj', '4.mlp.experts.19.gate_proj', '4.mlp.experts.19.up_proj', '4.mlp.experts.19.down_proj', '4.mlp.experts.6.gate_proj', '4.mlp.experts.6.up_proj', '4.mlp.experts.6.down_proj', '5.mlp.experts.42.gate_proj', '5.mlp.experts.42.up_proj', '5.mlp.experts.42.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.0.gate_proj', '5.mlp.experts.0.up_proj', '5.mlp.experts.0.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '6.mlp.experts.52.gate_proj', '6.mlp.experts.52.up_proj', '6.mlp.experts.52.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.42.gate_proj', '6.mlp.experts.42.up_proj', '6.mlp.experts.42.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '7.mlp.experts.17.gate_proj', '7.mlp.experts.17.up_proj', '7.mlp.experts.17.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.4.gate_proj', '7.mlp.experts.4.up_proj', '7.mlp.experts.4.down_proj', '7.mlp.experts.39.gate_proj', '7.mlp.experts.39.up_proj', '7.mlp.experts.39.down_proj', '7.mlp.experts.28.gate_proj', '7.mlp.experts.28.up_proj', '7.mlp.experts.28.down_proj', '8.mlp.experts.54.gate_proj', '8.mlp.experts.54.up_proj', '8.mlp.experts.54.down_proj', '8.mlp.experts.3.gate_proj', '8.mlp.experts.3.up_proj', '8.mlp.experts.3.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.16.gate_proj', '8.mlp.experts.16.up_proj', '8.mlp.experts.16.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '9.mlp.experts.46.gate_proj', '9.mlp.experts.46.up_proj', '9.mlp.experts.46.down_proj', '9.mlp.experts.51.gate_proj', '9.mlp.experts.51.up_proj', '9.mlp.experts.51.down_proj', '9.mlp.experts.5.gate_proj', '9.mlp.experts.5.up_proj', '9.mlp.experts.5.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.4.gate_proj', '9.mlp.experts.4.up_proj', '9.mlp.experts.4.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '10.mlp.experts.56.gate_proj', '10.mlp.experts.56.up_proj', '10.mlp.experts.56.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.19.gate_proj', '10.mlp.experts.19.up_proj', '10.mlp.experts.19.down_proj', '10.mlp.experts.46.gate_proj', '10.mlp.experts.46.up_proj', '10.mlp.experts.46.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.23.gate_proj', '10.mlp.experts.23.up_proj', '10.mlp.experts.23.down_proj', '11.mlp.experts.27.gate_proj', '11.mlp.experts.27.up_proj', '11.mlp.experts.27.down_proj', '11.mlp.experts.13.gate_proj', '11.mlp.experts.13.up_proj', '11.mlp.experts.13.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.54.gate_proj', '11.mlp.experts.54.up_proj', '11.mlp.experts.54.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '12.mlp.experts.38.gate_proj', '12.mlp.experts.38.up_proj', '12.mlp.experts.38.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.31.gate_proj', '12.mlp.experts.31.up_proj', '12.mlp.experts.31.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.43.gate_proj', '12.mlp.experts.43.up_proj', '12.mlp.experts.43.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.28.gate_proj', '13.mlp.experts.28.up_proj', '13.mlp.experts.28.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.6.gate_proj', '14.mlp.experts.6.up_proj', '14.mlp.experts.6.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.36.gate_proj', '14.mlp.experts.36.up_proj', '14.mlp.experts.36.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.17.gate_proj', '14.mlp.experts.17.up_proj', '14.mlp.experts.17.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.17.gate_proj', '15.mlp.experts.17.up_proj', '15.mlp.experts.17.down_proj', '15.mlp.experts.44.gate_proj', '15.mlp.experts.44.up_proj', '15.mlp.experts.44.down_proj', '15.mlp.experts.36.gate_proj', '15.mlp.experts.36.up_proj', '15.mlp.experts.36.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.12.gate_proj', '15.mlp.experts.12.up_proj', '15.mlp.experts.12.down_proj']
                #target_modules = ['gate', '0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.8.gate_proj', '0.mlp.experts.8.up_proj', '0.mlp.experts.8.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '0.mlp.experts.47.gate_proj', '0.mlp.experts.47.up_proj', '0.mlp.experts.47.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.53.gate_proj', '0.mlp.experts.53.up_proj', '0.mlp.experts.53.down_proj', '1.mlp.experts.25.gate_proj', '1.mlp.experts.25.up_proj', '1.mlp.experts.25.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.18.gate_proj', '1.mlp.experts.18.up_proj', '1.mlp.experts.18.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.17.gate_proj', '1.mlp.experts.17.up_proj', '1.mlp.experts.17.down_proj', '2.mlp.experts.40.gate_proj', '2.mlp.experts.40.up_proj', '2.mlp.experts.40.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.60.gate_proj', '2.mlp.experts.60.up_proj', '2.mlp.experts.60.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.61.gate_proj', '2.mlp.experts.61.up_proj', '2.mlp.experts.61.down_proj', '2.mlp.experts.10.gate_proj', '2.mlp.experts.10.up_proj', '2.mlp.experts.10.down_proj', '2.mlp.experts.45.gate_proj', '2.mlp.experts.45.up_proj', '2.mlp.experts.45.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.39.gate_proj', '3.mlp.experts.39.up_proj', '3.mlp.experts.39.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.44.gate_proj', '3.mlp.experts.44.up_proj', '3.mlp.experts.44.down_proj', '3.mlp.experts.16.gate_proj', '3.mlp.experts.16.up_proj', '3.mlp.experts.16.down_proj', '4.mlp.experts.21.gate_proj', '4.mlp.experts.21.up_proj', '4.mlp.experts.21.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.17.gate_proj', '4.mlp.experts.17.up_proj', '4.mlp.experts.17.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.55.gate_proj', '4.mlp.experts.55.up_proj', '4.mlp.experts.55.down_proj', '4.mlp.experts.19.gate_proj', '4.mlp.experts.19.up_proj', '4.mlp.experts.19.down_proj', '4.mlp.experts.6.gate_proj', '4.mlp.experts.6.up_proj', '4.mlp.experts.6.down_proj', '5.mlp.experts.42.gate_proj', '5.mlp.experts.42.up_proj', '5.mlp.experts.42.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.0.gate_proj', '5.mlp.experts.0.up_proj', '5.mlp.experts.0.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '6.mlp.experts.52.gate_proj', '6.mlp.experts.52.up_proj', '6.mlp.experts.52.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.42.gate_proj', '6.mlp.experts.42.up_proj', '6.mlp.experts.42.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '7.mlp.experts.17.gate_proj', '7.mlp.experts.17.up_proj', '7.mlp.experts.17.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.4.gate_proj', '7.mlp.experts.4.up_proj', '7.mlp.experts.4.down_proj', '7.mlp.experts.39.gate_proj', '7.mlp.experts.39.up_proj', '7.mlp.experts.39.down_proj', '7.mlp.experts.28.gate_proj', '7.mlp.experts.28.up_proj', '7.mlp.experts.28.down_proj', '8.mlp.experts.54.gate_proj', '8.mlp.experts.54.up_proj', '8.mlp.experts.54.down_proj', '8.mlp.experts.3.gate_proj', '8.mlp.experts.3.up_proj', '8.mlp.experts.3.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.16.gate_proj', '8.mlp.experts.16.up_proj', '8.mlp.experts.16.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '9.mlp.experts.46.gate_proj', '9.mlp.experts.46.up_proj', '9.mlp.experts.46.down_proj', '9.mlp.experts.51.gate_proj', '9.mlp.experts.51.up_proj', '9.mlp.experts.51.down_proj', '9.mlp.experts.5.gate_proj', '9.mlp.experts.5.up_proj', '9.mlp.experts.5.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.4.gate_proj', '9.mlp.experts.4.up_proj', '9.mlp.experts.4.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '10.mlp.experts.56.gate_proj', '10.mlp.experts.56.up_proj', '10.mlp.experts.56.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.19.gate_proj', '10.mlp.experts.19.up_proj', '10.mlp.experts.19.down_proj', '10.mlp.experts.46.gate_proj', '10.mlp.experts.46.up_proj', '10.mlp.experts.46.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.23.gate_proj', '10.mlp.experts.23.up_proj', '10.mlp.experts.23.down_proj', '11.mlp.experts.27.gate_proj', '11.mlp.experts.27.up_proj', '11.mlp.experts.27.down_proj', '11.mlp.experts.13.gate_proj', '11.mlp.experts.13.up_proj', '11.mlp.experts.13.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.54.gate_proj', '11.mlp.experts.54.up_proj', '11.mlp.experts.54.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '12.mlp.experts.38.gate_proj', '12.mlp.experts.38.up_proj', '12.mlp.experts.38.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.31.gate_proj', '12.mlp.experts.31.up_proj', '12.mlp.experts.31.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.43.gate_proj', '12.mlp.experts.43.up_proj', '12.mlp.experts.43.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.28.gate_proj', '13.mlp.experts.28.up_proj', '13.mlp.experts.28.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.6.gate_proj', '14.mlp.experts.6.up_proj', '14.mlp.experts.6.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.36.gate_proj', '14.mlp.experts.36.up_proj', '14.mlp.experts.36.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.17.gate_proj', '14.mlp.experts.17.up_proj', '14.mlp.experts.17.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.17.gate_proj', '15.mlp.experts.17.up_proj', '15.mlp.experts.17.down_proj', '15.mlp.experts.44.gate_proj', '15.mlp.experts.44.up_proj', '15.mlp.experts.44.down_proj', '15.mlp.experts.36.gate_proj', '15.mlp.experts.36.up_proj', '15.mlp.experts.36.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.12.gate_proj', '15.mlp.experts.12.up_proj', '15.mlp.experts.12.down_proj']
                #target_modules = ['q_proj','k_proj','v_proj','o_proj','gate', '0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.8.gate_proj', '0.mlp.experts.8.up_proj', '0.mlp.experts.8.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '0.mlp.experts.47.gate_proj', '0.mlp.experts.47.up_proj', '0.mlp.experts.47.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.53.gate_proj', '0.mlp.experts.53.up_proj', '0.mlp.experts.53.down_proj', '1.mlp.experts.25.gate_proj', '1.mlp.experts.25.up_proj', '1.mlp.experts.25.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.18.gate_proj', '1.mlp.experts.18.up_proj', '1.mlp.experts.18.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.17.gate_proj', '1.mlp.experts.17.up_proj', '1.mlp.experts.17.down_proj', '2.mlp.experts.40.gate_proj', '2.mlp.experts.40.up_proj', '2.mlp.experts.40.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.60.gate_proj', '2.mlp.experts.60.up_proj', '2.mlp.experts.60.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.61.gate_proj', '2.mlp.experts.61.up_proj', '2.mlp.experts.61.down_proj', '2.mlp.experts.10.gate_proj', '2.mlp.experts.10.up_proj', '2.mlp.experts.10.down_proj', '2.mlp.experts.45.gate_proj', '2.mlp.experts.45.up_proj', '2.mlp.experts.45.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.39.gate_proj', '3.mlp.experts.39.up_proj', '3.mlp.experts.39.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.44.gate_proj', '3.mlp.experts.44.up_proj', '3.mlp.experts.44.down_proj', '3.mlp.experts.16.gate_proj', '3.mlp.experts.16.up_proj', '3.mlp.experts.16.down_proj', '4.mlp.experts.21.gate_proj', '4.mlp.experts.21.up_proj', '4.mlp.experts.21.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.17.gate_proj', '4.mlp.experts.17.up_proj', '4.mlp.experts.17.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.55.gate_proj', '4.mlp.experts.55.up_proj', '4.mlp.experts.55.down_proj', '4.mlp.experts.19.gate_proj', '4.mlp.experts.19.up_proj', '4.mlp.experts.19.down_proj', '4.mlp.experts.6.gate_proj', '4.mlp.experts.6.up_proj', '4.mlp.experts.6.down_proj', '5.mlp.experts.42.gate_proj', '5.mlp.experts.42.up_proj', '5.mlp.experts.42.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.0.gate_proj', '5.mlp.experts.0.up_proj', '5.mlp.experts.0.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '6.mlp.experts.52.gate_proj', '6.mlp.experts.52.up_proj', '6.mlp.experts.52.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.42.gate_proj', '6.mlp.experts.42.up_proj', '6.mlp.experts.42.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '7.mlp.experts.17.gate_proj', '7.mlp.experts.17.up_proj', '7.mlp.experts.17.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.4.gate_proj', '7.mlp.experts.4.up_proj', '7.mlp.experts.4.down_proj', '7.mlp.experts.39.gate_proj', '7.mlp.experts.39.up_proj', '7.mlp.experts.39.down_proj', '7.mlp.experts.28.gate_proj', '7.mlp.experts.28.up_proj', '7.mlp.experts.28.down_proj', '8.mlp.experts.54.gate_proj', '8.mlp.experts.54.up_proj', '8.mlp.experts.54.down_proj', '8.mlp.experts.3.gate_proj', '8.mlp.experts.3.up_proj', '8.mlp.experts.3.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.16.gate_proj', '8.mlp.experts.16.up_proj', '8.mlp.experts.16.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '9.mlp.experts.46.gate_proj', '9.mlp.experts.46.up_proj', '9.mlp.experts.46.down_proj', '9.mlp.experts.51.gate_proj', '9.mlp.experts.51.up_proj', '9.mlp.experts.51.down_proj', '9.mlp.experts.5.gate_proj', '9.mlp.experts.5.up_proj', '9.mlp.experts.5.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.4.gate_proj', '9.mlp.experts.4.up_proj', '9.mlp.experts.4.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '10.mlp.experts.56.gate_proj', '10.mlp.experts.56.up_proj', '10.mlp.experts.56.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.19.gate_proj', '10.mlp.experts.19.up_proj', '10.mlp.experts.19.down_proj', '10.mlp.experts.46.gate_proj', '10.mlp.experts.46.up_proj', '10.mlp.experts.46.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.23.gate_proj', '10.mlp.experts.23.up_proj', '10.mlp.experts.23.down_proj', '11.mlp.experts.27.gate_proj', '11.mlp.experts.27.up_proj', '11.mlp.experts.27.down_proj', '11.mlp.experts.13.gate_proj', '11.mlp.experts.13.up_proj', '11.mlp.experts.13.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.54.gate_proj', '11.mlp.experts.54.up_proj', '11.mlp.experts.54.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '12.mlp.experts.38.gate_proj', '12.mlp.experts.38.up_proj', '12.mlp.experts.38.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.31.gate_proj', '12.mlp.experts.31.up_proj', '12.mlp.experts.31.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.43.gate_proj', '12.mlp.experts.43.up_proj', '12.mlp.experts.43.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '13.mlp.experts.6.gate_proj', '13.mlp.experts.6.up_proj', '13.mlp.experts.6.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.28.gate_proj', '13.mlp.experts.28.up_proj', '13.mlp.experts.28.down_proj', '13.mlp.experts.21.gate_proj', '13.mlp.experts.21.up_proj', '13.mlp.experts.21.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.6.gate_proj', '14.mlp.experts.6.up_proj', '14.mlp.experts.6.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.36.gate_proj', '14.mlp.experts.36.up_proj', '14.mlp.experts.36.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.17.gate_proj', '14.mlp.experts.17.up_proj', '14.mlp.experts.17.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.17.gate_proj', '15.mlp.experts.17.up_proj', '15.mlp.experts.17.down_proj', '15.mlp.experts.44.gate_proj', '15.mlp.experts.44.up_proj', '15.mlp.experts.44.down_proj', '15.mlp.experts.36.gate_proj', '15.mlp.experts.36.up_proj', '15.mlp.experts.36.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.12.gate_proj', '15.mlp.experts.12.up_proj', '15.mlp.experts.12.down_proj']
                #target_modules for safe
                target_modules = ['0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.61.gate_proj', '0.mlp.experts.61.up_proj', '0.mlp.experts.61.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.60.gate_proj', '0.mlp.experts.60.up_proj', '0.mlp.experts.60.down_proj', '0.mlp.experts.57.gate_proj', '0.mlp.experts.57.up_proj', '0.mlp.experts.57.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.29.gate_proj', '1.mlp.experts.29.up_proj', '1.mlp.experts.29.down_proj', '1.mlp.experts.41.gate_proj', '1.mlp.experts.41.up_proj', '1.mlp.experts.41.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.51.gate_proj', '1.mlp.experts.51.up_proj', '1.mlp.experts.51.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.4.gate_proj', '2.mlp.experts.4.up_proj', '2.mlp.experts.4.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.8.gate_proj', '2.mlp.experts.8.up_proj', '2.mlp.experts.8.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.51.gate_proj', '2.mlp.experts.51.up_proj', '2.mlp.experts.51.down_proj', '2.mlp.experts.55.gate_proj', '2.mlp.experts.55.up_proj', '2.mlp.experts.55.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.51.gate_proj', '3.mlp.experts.51.up_proj', '3.mlp.experts.51.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.4.gate_proj', '3.mlp.experts.4.up_proj', '3.mlp.experts.4.down_proj', '3.mlp.experts.33.gate_proj', '3.mlp.experts.33.up_proj', '3.mlp.experts.33.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.2.gate_proj', '4.mlp.experts.2.up_proj', '4.mlp.experts.2.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.51.gate_proj', '4.mlp.experts.51.up_proj', '4.mlp.experts.51.down_proj', '4.mlp.experts.63.gate_proj', '4.mlp.experts.63.up_proj', '4.mlp.experts.63.down_proj', '4.mlp.experts.48.gate_proj', '4.mlp.experts.48.up_proj', '4.mlp.experts.48.down_proj', '4.mlp.experts.29.gate_proj', '4.mlp.experts.29.up_proj', '4.mlp.experts.29.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.13.gate_proj', '5.mlp.experts.13.up_proj', '5.mlp.experts.13.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.26.gate_proj', '5.mlp.experts.26.up_proj', '5.mlp.experts.26.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.23.gate_proj', '6.mlp.experts.23.up_proj', '6.mlp.experts.23.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '6.mlp.experts.38.gate_proj', '6.mlp.experts.38.up_proj', '6.mlp.experts.38.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.13.gate_proj', '7.mlp.experts.13.up_proj', '7.mlp.experts.13.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.42.gate_proj', '7.mlp.experts.42.up_proj', '7.mlp.experts.42.down_proj', '7.mlp.experts.22.gate_proj', '7.mlp.experts.22.up_proj', '7.mlp.experts.22.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.33.gate_proj', '7.mlp.experts.33.up_proj', '7.mlp.experts.33.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.45.gate_proj', '8.mlp.experts.45.up_proj', '8.mlp.experts.45.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.37.gate_proj', '8.mlp.experts.37.up_proj', '8.mlp.experts.37.down_proj', '8.mlp.experts.26.gate_proj', '8.mlp.experts.26.up_proj', '8.mlp.experts.26.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.6.gate_proj', '9.mlp.experts.6.up_proj', '9.mlp.experts.6.down_proj', '9.mlp.experts.7.gate_proj', '9.mlp.experts.7.up_proj', '9.mlp.experts.7.down_proj', '9.mlp.experts.12.gate_proj', '9.mlp.experts.12.up_proj', '9.mlp.experts.12.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '9.mlp.experts.34.gate_proj', '9.mlp.experts.34.up_proj', '9.mlp.experts.34.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.59.gate_proj', '10.mlp.experts.59.up_proj', '10.mlp.experts.59.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.44.gate_proj', '10.mlp.experts.44.up_proj', '10.mlp.experts.44.down_proj', '10.mlp.experts.3.gate_proj', '10.mlp.experts.3.up_proj', '10.mlp.experts.3.down_proj', '10.mlp.experts.19.gate_proj', '10.mlp.experts.19.up_proj', '10.mlp.experts.19.down_proj', '11.mlp.experts.14.gate_proj', '11.mlp.experts.14.up_proj', '11.mlp.experts.14.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.51.gate_proj', '11.mlp.experts.51.up_proj', '11.mlp.experts.51.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '11.mlp.experts.27.gate_proj', '11.mlp.experts.27.up_proj', '11.mlp.experts.27.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.23.gate_proj', '12.mlp.experts.23.up_proj', '12.mlp.experts.23.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.29.gate_proj', '12.mlp.experts.29.up_proj', '12.mlp.experts.29.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '12.mlp.experts.59.gate_proj', '12.mlp.experts.59.up_proj', '12.mlp.experts.59.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.31.gate_proj', '13.mlp.experts.31.up_proj', '13.mlp.experts.31.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '13.mlp.experts.4.gate_proj', '13.mlp.experts.4.up_proj', '13.mlp.experts.4.down_proj', '13.mlp.experts.10.gate_proj', '13.mlp.experts.10.up_proj', '13.mlp.experts.10.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.22.gate_proj', '14.mlp.experts.22.up_proj', '14.mlp.experts.22.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.36.gate_proj', '14.mlp.experts.36.up_proj', '14.mlp.experts.36.down_proj', '14.mlp.experts.6.gate_proj', '14.mlp.experts.6.up_proj', '14.mlp.experts.6.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.58.gate_proj', '15.mlp.experts.58.up_proj', '15.mlp.experts.58.down_proj', '15.mlp.experts.22.gate_proj', '15.mlp.experts.22.up_proj', '15.mlp.experts.22.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.36.gate_proj', '15.mlp.experts.36.up_proj', '15.mlp.experts.36.down_proj', '15.mlp.experts.12.gate_proj', '15.mlp.experts.12.up_proj', '15.mlp.experts.12.down_proj']
                #used for lori-d
                #target_modules = ['gate','0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.61.gate_proj', '0.mlp.experts.61.up_proj', '0.mlp.experts.61.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.60.gate_proj', '0.mlp.experts.60.up_proj', '0.mlp.experts.60.down_proj', '0.mlp.experts.57.gate_proj', '0.mlp.experts.57.up_proj', '0.mlp.experts.57.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.25.gate_proj', '1.mlp.experts.25.up_proj', '1.mlp.experts.25.down_proj', '1.mlp.experts.41.gate_proj', '1.mlp.experts.41.up_proj', '1.mlp.experts.41.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.29.gate_proj', '1.mlp.experts.29.up_proj', '1.mlp.experts.29.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.4.gate_proj', '2.mlp.experts.4.up_proj', '2.mlp.experts.4.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.40.gate_proj', '2.mlp.experts.40.up_proj', '2.mlp.experts.40.down_proj', '2.mlp.experts.51.gate_proj', '2.mlp.experts.51.up_proj', '2.mlp.experts.51.down_proj', '2.mlp.experts.8.gate_proj', '2.mlp.experts.8.up_proj', '2.mlp.experts.8.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.51.gate_proj', '3.mlp.experts.51.up_proj', '3.mlp.experts.51.down_proj', '3.mlp.experts.4.gate_proj', '3.mlp.experts.4.up_proj', '3.mlp.experts.4.down_proj', '3.mlp.experts.33.gate_proj', '3.mlp.experts.33.up_proj', '3.mlp.experts.33.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.2.gate_proj', '4.mlp.experts.2.up_proj', '4.mlp.experts.2.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.51.gate_proj', '4.mlp.experts.51.up_proj', '4.mlp.experts.51.down_proj', '4.mlp.experts.63.gate_proj', '4.mlp.experts.63.up_proj', '4.mlp.experts.63.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.48.gate_proj', '4.mlp.experts.48.up_proj', '4.mlp.experts.48.down_proj', '4.mlp.experts.41.gate_proj', '4.mlp.experts.41.up_proj', '4.mlp.experts.41.down_proj', '5.mlp.experts.13.gate_proj', '5.mlp.experts.13.up_proj', '5.mlp.experts.13.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.26.gate_proj', '5.mlp.experts.26.up_proj', '5.mlp.experts.26.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.23.gate_proj', '6.mlp.experts.23.up_proj', '6.mlp.experts.23.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '6.mlp.experts.38.gate_proj', '6.mlp.experts.38.up_proj', '6.mlp.experts.38.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.13.gate_proj', '7.mlp.experts.13.up_proj', '7.mlp.experts.13.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.42.gate_proj', '7.mlp.experts.42.up_proj', '7.mlp.experts.42.down_proj', '7.mlp.experts.22.gate_proj', '7.mlp.experts.22.up_proj', '7.mlp.experts.22.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.33.gate_proj', '7.mlp.experts.33.up_proj', '7.mlp.experts.33.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.45.gate_proj', '8.mlp.experts.45.up_proj', '8.mlp.experts.45.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '8.mlp.experts.37.gate_proj', '8.mlp.experts.37.up_proj', '8.mlp.experts.37.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.62.gate_proj', '8.mlp.experts.62.up_proj', '8.mlp.experts.62.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.7.gate_proj', '9.mlp.experts.7.up_proj', '9.mlp.experts.7.down_proj', '9.mlp.experts.6.gate_proj', '9.mlp.experts.6.up_proj', '9.mlp.experts.6.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '9.mlp.experts.12.gate_proj', '9.mlp.experts.12.up_proj', '9.mlp.experts.12.down_proj', '9.mlp.experts.31.gate_proj', '9.mlp.experts.31.up_proj', '9.mlp.experts.31.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.59.gate_proj', '10.mlp.experts.59.up_proj', '10.mlp.experts.59.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.3.gate_proj', '10.mlp.experts.3.up_proj', '10.mlp.experts.3.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.44.gate_proj', '10.mlp.experts.44.up_proj', '10.mlp.experts.44.down_proj', '10.mlp.experts.19.gate_proj', '10.mlp.experts.19.up_proj', '10.mlp.experts.19.down_proj', '11.mlp.experts.14.gate_proj', '11.mlp.experts.14.up_proj', '11.mlp.experts.14.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.51.gate_proj', '11.mlp.experts.51.up_proj', '11.mlp.experts.51.down_proj', '11.mlp.experts.35.gate_proj', '11.mlp.experts.35.up_proj', '11.mlp.experts.35.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.23.gate_proj', '12.mlp.experts.23.up_proj', '12.mlp.experts.23.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.29.gate_proj', '12.mlp.experts.29.up_proj', '12.mlp.experts.29.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '12.mlp.experts.59.gate_proj', '12.mlp.experts.59.up_proj', '12.mlp.experts.59.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.31.gate_proj', '13.mlp.experts.31.up_proj', '13.mlp.experts.31.down_proj', '13.mlp.experts.10.gate_proj', '13.mlp.experts.10.up_proj', '13.mlp.experts.10.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '13.mlp.experts.22.gate_proj', '13.mlp.experts.22.up_proj', '13.mlp.experts.22.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.22.gate_proj', '14.mlp.experts.22.up_proj', '14.mlp.experts.22.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.46.gate_proj', '14.mlp.experts.46.up_proj', '14.mlp.experts.46.down_proj', '14.mlp.experts.29.gate_proj', '14.mlp.experts.29.up_proj', '14.mlp.experts.29.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.22.gate_proj', '15.mlp.experts.22.up_proj', '15.mlp.experts.22.down_proj', '15.mlp.experts.9.gate_proj', '15.mlp.experts.9.up_proj', '15.mlp.experts.9.down_proj', '15.mlp.experts.58.gate_proj', '15.mlp.experts.58.up_proj', '15.mlp.experts.58.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.36.gate_proj', '15.mlp.experts.36.up_proj', '15.mlp.experts.36.down_proj']
                #useforlori-s
                #target_modules = ['gate', '0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.21.gate_proj', '0.mlp.experts.21.up_proj', '0.mlp.experts.21.down_proj', '0.mlp.experts.61.gate_proj', '0.mlp.experts.61.up_proj', '0.mlp.experts.61.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '0.mlp.experts.59.gate_proj', '0.mlp.experts.59.up_proj', '0.mlp.experts.59.down_proj', '0.mlp.experts.57.gate_proj', '0.mlp.experts.57.up_proj', '0.mlp.experts.57.down_proj', '0.mlp.experts.60.gate_proj', '0.mlp.experts.60.up_proj', '0.mlp.experts.60.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.41.gate_proj', '1.mlp.experts.41.up_proj', '1.mlp.experts.41.down_proj', '1.mlp.experts.15.gate_proj', '1.mlp.experts.15.up_proj', '1.mlp.experts.15.down_proj', '1.mlp.experts.29.gate_proj', '1.mlp.experts.29.up_proj', '1.mlp.experts.29.down_proj', '1.mlp.experts.51.gate_proj', '1.mlp.experts.51.up_proj', '1.mlp.experts.51.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.4.gate_proj', '2.mlp.experts.4.up_proj', '2.mlp.experts.4.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.5.gate_proj', '2.mlp.experts.5.up_proj', '2.mlp.experts.5.down_proj', '2.mlp.experts.8.gate_proj', '2.mlp.experts.8.up_proj', '2.mlp.experts.8.down_proj', '2.mlp.experts.51.gate_proj', '2.mlp.experts.51.up_proj', '2.mlp.experts.51.down_proj', '2.mlp.experts.40.gate_proj', '2.mlp.experts.40.up_proj', '2.mlp.experts.40.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.43.gate_proj', '3.mlp.experts.43.up_proj', '3.mlp.experts.43.down_proj', '3.mlp.experts.35.gate_proj', '3.mlp.experts.35.up_proj', '3.mlp.experts.35.down_proj', '3.mlp.experts.51.gate_proj', '3.mlp.experts.51.up_proj', '3.mlp.experts.51.down_proj', '3.mlp.experts.4.gate_proj', '3.mlp.experts.4.up_proj', '3.mlp.experts.4.down_proj', '3.mlp.experts.39.gate_proj', '3.mlp.experts.39.up_proj', '3.mlp.experts.39.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.2.gate_proj', '4.mlp.experts.2.up_proj', '4.mlp.experts.2.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.41.gate_proj', '4.mlp.experts.41.up_proj', '4.mlp.experts.41.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.51.gate_proj', '4.mlp.experts.51.up_proj', '4.mlp.experts.51.down_proj', '4.mlp.experts.48.gate_proj', '4.mlp.experts.48.up_proj', '4.mlp.experts.48.down_proj', '4.mlp.experts.29.gate_proj', '4.mlp.experts.29.up_proj', '4.mlp.experts.29.down_proj', '5.mlp.experts.13.gate_proj', '5.mlp.experts.13.up_proj', '5.mlp.experts.13.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.26.gate_proj', '5.mlp.experts.26.up_proj', '5.mlp.experts.26.down_proj', '5.mlp.experts.60.gate_proj', '5.mlp.experts.60.up_proj', '5.mlp.experts.60.down_proj', '5.mlp.experts.21.gate_proj', '5.mlp.experts.21.up_proj', '5.mlp.experts.21.down_proj', '5.mlp.experts.51.gate_proj', '5.mlp.experts.51.up_proj', '5.mlp.experts.51.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.23.gate_proj', '6.mlp.experts.23.up_proj', '6.mlp.experts.23.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.38.gate_proj', '6.mlp.experts.38.up_proj', '6.mlp.experts.38.down_proj', '7.mlp.experts.13.gate_proj', '7.mlp.experts.13.up_proj', '7.mlp.experts.13.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.42.gate_proj', '7.mlp.experts.42.up_proj', '7.mlp.experts.42.down_proj', '7.mlp.experts.22.gate_proj', '7.mlp.experts.22.up_proj', '7.mlp.experts.22.down_proj', '7.mlp.experts.49.gate_proj', '7.mlp.experts.49.up_proj', '7.mlp.experts.49.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.45.gate_proj', '8.mlp.experts.45.up_proj', '8.mlp.experts.45.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '8.mlp.experts.37.gate_proj', '8.mlp.experts.37.up_proj', '8.mlp.experts.37.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.26.gate_proj', '8.mlp.experts.26.up_proj', '8.mlp.experts.26.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.6.gate_proj', '9.mlp.experts.6.up_proj', '9.mlp.experts.6.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.7.gate_proj', '9.mlp.experts.7.up_proj', '9.mlp.experts.7.down_proj', '9.mlp.experts.12.gate_proj', '9.mlp.experts.12.up_proj', '9.mlp.experts.12.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '9.mlp.experts.31.gate_proj', '9.mlp.experts.31.up_proj', '9.mlp.experts.31.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.59.gate_proj', '10.mlp.experts.59.up_proj', '10.mlp.experts.59.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.44.gate_proj', '10.mlp.experts.44.up_proj', '10.mlp.experts.44.down_proj', '10.mlp.experts.3.gate_proj', '10.mlp.experts.3.up_proj', '10.mlp.experts.3.down_proj', '10.mlp.experts.23.gate_proj', '10.mlp.experts.23.up_proj', '10.mlp.experts.23.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.51.gate_proj', '11.mlp.experts.51.up_proj', '11.mlp.experts.51.down_proj', '11.mlp.experts.14.gate_proj', '11.mlp.experts.14.up_proj', '11.mlp.experts.14.down_proj', '11.mlp.experts.54.gate_proj', '11.mlp.experts.54.up_proj', '11.mlp.experts.54.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.23.gate_proj', '12.mlp.experts.23.up_proj', '12.mlp.experts.23.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '12.mlp.experts.29.gate_proj', '12.mlp.experts.29.up_proj', '12.mlp.experts.29.down_proj', '12.mlp.experts.59.gate_proj', '12.mlp.experts.59.up_proj', '12.mlp.experts.59.down_proj', '12.mlp.experts.6.gate_proj', '12.mlp.experts.6.up_proj', '12.mlp.experts.6.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.31.gate_proj', '13.mlp.experts.31.up_proj', '13.mlp.experts.31.down_proj', '13.mlp.experts.28.gate_proj', '13.mlp.experts.28.up_proj', '13.mlp.experts.28.down_proj', '13.mlp.experts.4.gate_proj', '13.mlp.experts.4.up_proj', '13.mlp.experts.4.down_proj', '13.mlp.experts.10.gate_proj', '13.mlp.experts.10.up_proj', '13.mlp.experts.10.down_proj', '13.mlp.experts.25.gate_proj', '13.mlp.experts.25.up_proj', '13.mlp.experts.25.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.22.gate_proj', '14.mlp.experts.22.up_proj', '14.mlp.experts.22.down_proj', '14.mlp.experts.47.gate_proj', '14.mlp.experts.47.up_proj', '14.mlp.experts.47.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.29.gate_proj', '14.mlp.experts.29.up_proj', '14.mlp.experts.29.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.9.gate_proj', '15.mlp.experts.9.up_proj', '15.mlp.experts.9.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.63.gate_proj', '15.mlp.experts.63.up_proj', '15.mlp.experts.63.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj', '15.mlp.experts.58.gate_proj', '15.mlp.experts.58.up_proj', '15.mlp.experts.58.down_proj', '15.mlp.experts.31.gate_proj', '15.mlp.experts.31.up_proj', '15.mlp.experts.31.down_proj']
                #target_modules = ['q_proj','k_proj','v_proj','o_proj','gate','0.mlp.experts.6.gate_proj', '0.mlp.experts.6.up_proj', '0.mlp.experts.6.down_proj', '0.mlp.experts.61.gate_proj', '0.mlp.experts.61.up_proj', '0.mlp.experts.61.down_proj', '0.mlp.experts.24.gate_proj', '0.mlp.experts.24.up_proj', '0.mlp.experts.24.down_proj', '0.mlp.experts.63.gate_proj', '0.mlp.experts.63.up_proj', '0.mlp.experts.63.down_proj', '0.mlp.experts.33.gate_proj', '0.mlp.experts.33.up_proj', '0.mlp.experts.33.down_proj', '0.mlp.experts.57.gate_proj', '0.mlp.experts.57.up_proj', '0.mlp.experts.57.down_proj', '0.mlp.experts.47.gate_proj', '0.mlp.experts.47.up_proj', '0.mlp.experts.47.down_proj', '0.mlp.experts.22.gate_proj', '0.mlp.experts.22.up_proj', '0.mlp.experts.22.down_proj', '1.mlp.experts.47.gate_proj', '1.mlp.experts.47.up_proj', '1.mlp.experts.47.down_proj', '1.mlp.experts.60.gate_proj', '1.mlp.experts.60.up_proj', '1.mlp.experts.60.down_proj', '1.mlp.experts.11.gate_proj', '1.mlp.experts.11.up_proj', '1.mlp.experts.11.down_proj', '1.mlp.experts.41.gate_proj', '1.mlp.experts.41.up_proj', '1.mlp.experts.41.down_proj', '1.mlp.experts.51.gate_proj', '1.mlp.experts.51.up_proj', '1.mlp.experts.51.down_proj', '1.mlp.experts.7.gate_proj', '1.mlp.experts.7.up_proj', '1.mlp.experts.7.down_proj', '1.mlp.experts.29.gate_proj', '1.mlp.experts.29.up_proj', '1.mlp.experts.29.down_proj', '1.mlp.experts.12.gate_proj', '1.mlp.experts.12.up_proj', '1.mlp.experts.12.down_proj', '2.mlp.experts.8.gate_proj', '2.mlp.experts.8.up_proj', '2.mlp.experts.8.down_proj', '2.mlp.experts.9.gate_proj', '2.mlp.experts.9.up_proj', '2.mlp.experts.9.down_proj', '2.mlp.experts.46.gate_proj', '2.mlp.experts.46.up_proj', '2.mlp.experts.46.down_proj', '2.mlp.experts.29.gate_proj', '2.mlp.experts.29.up_proj', '2.mlp.experts.29.down_proj', '2.mlp.experts.4.gate_proj', '2.mlp.experts.4.up_proj', '2.mlp.experts.4.down_proj', '2.mlp.experts.28.gate_proj', '2.mlp.experts.28.up_proj', '2.mlp.experts.28.down_proj', '2.mlp.experts.60.gate_proj', '2.mlp.experts.60.up_proj', '2.mlp.experts.60.down_proj', '2.mlp.experts.58.gate_proj', '2.mlp.experts.58.up_proj', '2.mlp.experts.58.down_proj', '3.mlp.experts.15.gate_proj', '3.mlp.experts.15.up_proj', '3.mlp.experts.15.down_proj', '3.mlp.experts.56.gate_proj', '3.mlp.experts.56.up_proj', '3.mlp.experts.56.down_proj', '3.mlp.experts.33.gate_proj', '3.mlp.experts.33.up_proj', '3.mlp.experts.33.down_proj', '3.mlp.experts.4.gate_proj', '3.mlp.experts.4.up_proj', '3.mlp.experts.4.down_proj', '3.mlp.experts.62.gate_proj', '3.mlp.experts.62.up_proj', '3.mlp.experts.62.down_proj', '3.mlp.experts.51.gate_proj', '3.mlp.experts.51.up_proj', '3.mlp.experts.51.down_proj', '3.mlp.experts.6.gate_proj', '3.mlp.experts.6.up_proj', '3.mlp.experts.6.down_proj', '3.mlp.experts.3.gate_proj', '3.mlp.experts.3.up_proj', '3.mlp.experts.3.down_proj', '4.mlp.experts.2.gate_proj', '4.mlp.experts.2.up_proj', '4.mlp.experts.2.down_proj', '4.mlp.experts.27.gate_proj', '4.mlp.experts.27.up_proj', '4.mlp.experts.27.down_proj', '4.mlp.experts.33.gate_proj', '4.mlp.experts.33.up_proj', '4.mlp.experts.33.down_proj', '4.mlp.experts.63.gate_proj', '4.mlp.experts.63.up_proj', '4.mlp.experts.63.down_proj', '4.mlp.experts.25.gate_proj', '4.mlp.experts.25.up_proj', '4.mlp.experts.25.down_proj', '4.mlp.experts.41.gate_proj', '4.mlp.experts.41.up_proj', '4.mlp.experts.41.down_proj', '4.mlp.experts.51.gate_proj', '4.mlp.experts.51.up_proj', '4.mlp.experts.51.down_proj', '4.mlp.experts.48.gate_proj', '4.mlp.experts.48.up_proj', '4.mlp.experts.48.down_proj', '5.mlp.experts.31.gate_proj', '5.mlp.experts.31.up_proj', '5.mlp.experts.31.down_proj', '5.mlp.experts.55.gate_proj', '5.mlp.experts.55.up_proj', '5.mlp.experts.55.down_proj', '5.mlp.experts.17.gate_proj', '5.mlp.experts.17.up_proj', '5.mlp.experts.17.down_proj', '5.mlp.experts.13.gate_proj', '5.mlp.experts.13.up_proj', '5.mlp.experts.13.down_proj', '5.mlp.experts.26.gate_proj', '5.mlp.experts.26.up_proj', '5.mlp.experts.26.down_proj', '5.mlp.experts.2.gate_proj', '5.mlp.experts.2.up_proj', '5.mlp.experts.2.down_proj', '5.mlp.experts.15.gate_proj', '5.mlp.experts.15.up_proj', '5.mlp.experts.15.down_proj', '5.mlp.experts.11.gate_proj', '5.mlp.experts.11.up_proj', '5.mlp.experts.11.down_proj', '6.mlp.experts.40.gate_proj', '6.mlp.experts.40.up_proj', '6.mlp.experts.40.down_proj', '6.mlp.experts.59.gate_proj', '6.mlp.experts.59.up_proj', '6.mlp.experts.59.down_proj', '6.mlp.experts.49.gate_proj', '6.mlp.experts.49.up_proj', '6.mlp.experts.49.down_proj', '6.mlp.experts.17.gate_proj', '6.mlp.experts.17.up_proj', '6.mlp.experts.17.down_proj', '6.mlp.experts.22.gate_proj', '6.mlp.experts.22.up_proj', '6.mlp.experts.22.down_proj', '6.mlp.experts.23.gate_proj', '6.mlp.experts.23.up_proj', '6.mlp.experts.23.down_proj', '6.mlp.experts.36.gate_proj', '6.mlp.experts.36.up_proj', '6.mlp.experts.36.down_proj', '6.mlp.experts.38.gate_proj', '6.mlp.experts.38.up_proj', '6.mlp.experts.38.down_proj', '7.mlp.experts.35.gate_proj', '7.mlp.experts.35.up_proj', '7.mlp.experts.35.down_proj', '7.mlp.experts.21.gate_proj', '7.mlp.experts.21.up_proj', '7.mlp.experts.21.down_proj', '7.mlp.experts.45.gate_proj', '7.mlp.experts.45.up_proj', '7.mlp.experts.45.down_proj', '7.mlp.experts.13.gate_proj', '7.mlp.experts.13.up_proj', '7.mlp.experts.13.down_proj', '7.mlp.experts.2.gate_proj', '7.mlp.experts.2.up_proj', '7.mlp.experts.2.down_proj', '7.mlp.experts.39.gate_proj', '7.mlp.experts.39.up_proj', '7.mlp.experts.39.down_proj', '7.mlp.experts.22.gate_proj', '7.mlp.experts.22.up_proj', '7.mlp.experts.22.down_proj', '7.mlp.experts.33.gate_proj', '7.mlp.experts.33.up_proj', '7.mlp.experts.33.down_proj', '8.mlp.experts.32.gate_proj', '8.mlp.experts.32.up_proj', '8.mlp.experts.32.down_proj', '8.mlp.experts.6.gate_proj', '8.mlp.experts.6.up_proj', '8.mlp.experts.6.down_proj', '8.mlp.experts.44.gate_proj', '8.mlp.experts.44.up_proj', '8.mlp.experts.44.down_proj', '8.mlp.experts.45.gate_proj', '8.mlp.experts.45.up_proj', '8.mlp.experts.45.down_proj', '8.mlp.experts.48.gate_proj', '8.mlp.experts.48.up_proj', '8.mlp.experts.48.down_proj', '8.mlp.experts.61.gate_proj', '8.mlp.experts.61.up_proj', '8.mlp.experts.61.down_proj', '8.mlp.experts.37.gate_proj', '8.mlp.experts.37.up_proj', '8.mlp.experts.37.down_proj', '8.mlp.experts.38.gate_proj', '8.mlp.experts.38.up_proj', '8.mlp.experts.38.down_proj', '9.mlp.experts.39.gate_proj', '9.mlp.experts.39.up_proj', '9.mlp.experts.39.down_proj', '9.mlp.experts.11.gate_proj', '9.mlp.experts.11.up_proj', '9.mlp.experts.11.down_proj', '9.mlp.experts.7.gate_proj', '9.mlp.experts.7.up_proj', '9.mlp.experts.7.down_proj', '9.mlp.experts.6.gate_proj', '9.mlp.experts.6.up_proj', '9.mlp.experts.6.down_proj', '9.mlp.experts.47.gate_proj', '9.mlp.experts.47.up_proj', '9.mlp.experts.47.down_proj', '9.mlp.experts.33.gate_proj', '9.mlp.experts.33.up_proj', '9.mlp.experts.33.down_proj', '9.mlp.experts.31.gate_proj', '9.mlp.experts.31.up_proj', '9.mlp.experts.31.down_proj', '9.mlp.experts.12.gate_proj', '9.mlp.experts.12.up_proj', '9.mlp.experts.12.down_proj', '10.mlp.experts.29.gate_proj', '10.mlp.experts.29.up_proj', '10.mlp.experts.29.down_proj', '10.mlp.experts.48.gate_proj', '10.mlp.experts.48.up_proj', '10.mlp.experts.48.down_proj', '10.mlp.experts.43.gate_proj', '10.mlp.experts.43.up_proj', '10.mlp.experts.43.down_proj', '10.mlp.experts.41.gate_proj', '10.mlp.experts.41.up_proj', '10.mlp.experts.41.down_proj', '10.mlp.experts.3.gate_proj', '10.mlp.experts.3.up_proj', '10.mlp.experts.3.down_proj', '10.mlp.experts.59.gate_proj', '10.mlp.experts.59.up_proj', '10.mlp.experts.59.down_proj', '10.mlp.experts.61.gate_proj', '10.mlp.experts.61.up_proj', '10.mlp.experts.61.down_proj', '10.mlp.experts.23.gate_proj', '10.mlp.experts.23.up_proj', '10.mlp.experts.23.down_proj', '11.mlp.experts.20.gate_proj', '11.mlp.experts.20.up_proj', '11.mlp.experts.20.down_proj', '11.mlp.experts.14.gate_proj', '11.mlp.experts.14.up_proj', '11.mlp.experts.14.down_proj', '11.mlp.experts.62.gate_proj', '11.mlp.experts.62.up_proj', '11.mlp.experts.62.down_proj', '11.mlp.experts.23.gate_proj', '11.mlp.experts.23.up_proj', '11.mlp.experts.23.down_proj', '11.mlp.experts.35.gate_proj', '11.mlp.experts.35.up_proj', '11.mlp.experts.35.down_proj', '11.mlp.experts.15.gate_proj', '11.mlp.experts.15.up_proj', '11.mlp.experts.15.down_proj', '11.mlp.experts.27.gate_proj', '11.mlp.experts.27.up_proj', '11.mlp.experts.27.down_proj', '11.mlp.experts.47.gate_proj', '11.mlp.experts.47.up_proj', '11.mlp.experts.47.down_proj', '12.mlp.experts.60.gate_proj', '12.mlp.experts.60.up_proj', '12.mlp.experts.60.down_proj', '12.mlp.experts.47.gate_proj', '12.mlp.experts.47.up_proj', '12.mlp.experts.47.down_proj', '12.mlp.experts.10.gate_proj', '12.mlp.experts.10.up_proj', '12.mlp.experts.10.down_proj', '12.mlp.experts.29.gate_proj', '12.mlp.experts.29.up_proj', '12.mlp.experts.29.down_proj', '12.mlp.experts.23.gate_proj', '12.mlp.experts.23.up_proj', '12.mlp.experts.23.down_proj', '12.mlp.experts.28.gate_proj', '12.mlp.experts.28.up_proj', '12.mlp.experts.28.down_proj', '12.mlp.experts.8.gate_proj', '12.mlp.experts.8.up_proj', '12.mlp.experts.8.down_proj', '12.mlp.experts.53.gate_proj', '12.mlp.experts.53.up_proj', '12.mlp.experts.53.down_proj', '13.mlp.experts.51.gate_proj', '13.mlp.experts.51.up_proj', '13.mlp.experts.51.down_proj', '13.mlp.experts.5.gate_proj', '13.mlp.experts.5.up_proj', '13.mlp.experts.5.down_proj', '13.mlp.experts.62.gate_proj', '13.mlp.experts.62.up_proj', '13.mlp.experts.62.down_proj', '13.mlp.experts.28.gate_proj', '13.mlp.experts.28.up_proj', '13.mlp.experts.28.down_proj', '13.mlp.experts.31.gate_proj', '13.mlp.experts.31.up_proj', '13.mlp.experts.31.down_proj', '13.mlp.experts.2.gate_proj', '13.mlp.experts.2.up_proj', '13.mlp.experts.2.down_proj', '13.mlp.experts.10.gate_proj', '13.mlp.experts.10.up_proj', '13.mlp.experts.10.down_proj', '13.mlp.experts.54.gate_proj', '13.mlp.experts.54.up_proj', '13.mlp.experts.54.down_proj', '14.mlp.experts.54.gate_proj', '14.mlp.experts.54.up_proj', '14.mlp.experts.54.down_proj', '14.mlp.experts.58.gate_proj', '14.mlp.experts.58.up_proj', '14.mlp.experts.58.down_proj', '14.mlp.experts.57.gate_proj', '14.mlp.experts.57.up_proj', '14.mlp.experts.57.down_proj', '14.mlp.experts.7.gate_proj', '14.mlp.experts.7.up_proj', '14.mlp.experts.7.down_proj', '14.mlp.experts.9.gate_proj', '14.mlp.experts.9.up_proj', '14.mlp.experts.9.down_proj', '14.mlp.experts.22.gate_proj', '14.mlp.experts.22.up_proj', '14.mlp.experts.22.down_proj', '14.mlp.experts.29.gate_proj', '14.mlp.experts.29.up_proj', '14.mlp.experts.29.down_proj', '14.mlp.experts.19.gate_proj', '14.mlp.experts.19.up_proj', '14.mlp.experts.19.down_proj', '15.mlp.experts.24.gate_proj', '15.mlp.experts.24.up_proj', '15.mlp.experts.24.down_proj', '15.mlp.experts.8.gate_proj', '15.mlp.experts.8.up_proj', '15.mlp.experts.8.down_proj', '15.mlp.experts.22.gate_proj', '15.mlp.experts.22.up_proj', '15.mlp.experts.22.down_proj', '15.mlp.experts.63.gate_proj', '15.mlp.experts.63.up_proj', '15.mlp.experts.63.down_proj', '15.mlp.experts.12.gate_proj', '15.mlp.experts.12.up_proj', '15.mlp.experts.12.down_proj', '15.mlp.experts.57.gate_proj', '15.mlp.experts.57.up_proj', '15.mlp.experts.57.down_proj', '15.mlp.experts.58.gate_proj', '15.mlp.experts.58.up_proj', '15.mlp.experts.58.down_proj', '15.mlp.experts.30.gate_proj', '15.mlp.experts.30.up_proj', '15.mlp.experts.30.down_proj']
                )
        policy = get_peft_model(policy, peft_config)
    #     #判断 name 中是否包含数字 1、2、3 或 7,即高频专家
    #     for name, param in policy.named_parameters():
    #         #has_digit = any(d in name for d in ['experts.1', 'experts.2', 'experts.3', 'experts.7'])
    #         # if ('lora_A' in name) and has_digit:
    #         #     # print("include name",name)
    #         #     param.requires_grad = False
    #         if 'lora_A' in name:
    #             param.requires_grad = False
    # # #     #Print the trainable parameters
    # else:
    #     policy = PeftModel.from_pretrained(policy, config.model.archive)
    #     print('loading from archive', config.model.archive)
    #     for name, param in policy.named_parameters():
    #         if 'lora_B' in name:
    #             param.requires_grad = True
        # #Print the trainable parameters
        policy.print_trainable_parameters()

    print('Number of trainable parameters:', sum(p.numel() for p in policy.parameters() if p.requires_grad))
    #policy.print_trainable_parameters()    
    disable_dropout(policy)

    if config.loss.name in ['dpo', 'soft_sft']:
        print('building reference model')
        reference_model_dtype = getattr(torch, config.model.reference_dtype)
        reference_model = transformers.AutoModelForCausalLM.from_pretrained(load_path, use_cache=False, 
                            low_cpu_mem_usage=True, torch_dtype=reference_model_dtype, **model_kwargs)
        disable_dropout(reference_model)
    else:
        reference_model = None
            
    if 'FSDP' in config.trainer:
        world_size = torch.cuda.device_count()
        print('starting', world_size, 'processes for FSDP training')
        mp.spawn(worker_main, nprocs=world_size, args=(world_size, config, policy, reference_model), join=True)
        print("finish lora/lori adapter training.")
    else:
        print('starting single-process worker')
        worker_main(0, 1, config, policy, reference_model)

if __name__ == '__main__':
    main()